1#! /usr/bin/env perl 2# Copyright 2005-2021 The OpenSSL Project Authors. All Rights Reserved. 3# 4# Licensed under the Apache License 2.0 (the "License"). You may not use 5# this file except in compliance with the License. You can obtain a copy 6# in the file LICENSE in the source distribution or at 7# https://www.openssl.org/source/license.html 8 9# 10# ==================================================================== 11# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 12# project. Rights for redistribution and usage in source and binary 13# forms are granted according to the License. 14# ==================================================================== 15# 16# Version 1.1 17# 18# The major reason for undertaken effort was to mitigate the hazard of 19# cache-timing attack. This is [currently and initially!] addressed in 20# two ways. 1. S-boxes are compressed from 5KB to 2KB+256B size each. 21# 2. References to them are scheduled for L2 cache latency, meaning 22# that the tables don't have to reside in L1 cache. Once again, this 23# is an initial draft and one should expect more countermeasures to 24# be implemented... 25# 26# Version 1.1 prefetches T[ed]4 in order to mitigate attack on last 27# round. 28# 29# Even though performance was not the primary goal [on the contrary, 30# extra shifts "induced" by compressed S-box and longer loop epilogue 31# "induced" by scheduling for L2 have negative effect on performance], 32# the code turned out to run in ~23 cycles per processed byte en-/ 33# decrypted with 128-bit key. This is pretty good result for code 34# with mentioned qualities and UltraSPARC core. Compared to Sun C 35# generated code my encrypt procedure runs just few percents faster, 36# while decrypt one - whole 50% faster [yes, Sun C failed to generate 37# optimal decrypt procedure]. Compared to GNU C generated code both 38# procedures are more than 60% faster:-) 39 40$output = pop and open STDOUT,">$output"; 41 42$frame="STACK_FRAME"; 43$bias="STACK_BIAS"; 44$locals=16; 45 46$acc0="%l0"; 47$acc1="%o0"; 48$acc2="%o1"; 49$acc3="%o2"; 50 51$acc4="%l1"; 52$acc5="%o3"; 53$acc6="%o4"; 54$acc7="%o5"; 55 56$acc8="%l2"; 57$acc9="%o7"; 58$acc10="%g1"; 59$acc11="%g2"; 60 61$acc12="%l3"; 62$acc13="%g3"; 63$acc14="%g4"; 64$acc15="%g5"; 65 66$t0="%l4"; 67$t1="%l5"; 68$t2="%l6"; 69$t3="%l7"; 70 71$s0="%i0"; 72$s1="%i1"; 73$s2="%i2"; 74$s3="%i3"; 75$tbl="%i4"; 76$key="%i5"; 77$rounds="%i7"; # aliases with return address, which is off-loaded to stack 78 79sub _data_word() 80{ my $i; 81 while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; } 82} 83 84$code.=<<___; 85#ifndef __ASSEMBLER__ 86# define __ASSEMBLER__ 1 87#endif 88#include "crypto/sparc_arch.h" 89 90#ifdef __arch64__ 91.register %g2,#scratch 92.register %g3,#scratch 93#endif 94.section ".text",#alloc,#execinstr 95 96.align 256 97AES_Te: 98___ 99&_data_word( 100 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d, 101 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554, 102 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d, 103 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a, 104 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87, 105 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b, 106 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea, 107 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b, 108 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a, 109 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f, 110 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108, 111 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f, 112 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e, 113 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5, 114 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d, 115 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f, 116 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e, 117 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb, 118 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce, 119 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497, 120 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c, 121 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed, 122 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b, 123 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a, 124 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16, 125 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594, 126 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81, 127 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3, 128 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a, 129 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504, 130 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163, 131 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d, 132 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f, 133 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739, 134 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47, 135 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395, 136 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f, 137 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883, 138 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c, 139 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76, 140 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e, 141 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4, 142 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6, 143 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b, 144 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7, 145 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0, 146 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25, 147 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818, 148 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72, 149 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651, 150 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21, 151 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85, 152 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa, 153 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12, 154 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0, 155 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9, 156 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133, 157 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7, 158 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920, 159 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a, 160 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17, 161 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8, 162 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11, 163 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a); 164$code.=<<___; 165 .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 166 .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 167 .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 168 .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 169 .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc 170 .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 171 .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a 172 .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 173 .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 174 .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 175 .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b 176 .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf 177 .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 178 .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 179 .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 180 .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 181 .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 182 .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 183 .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 184 .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb 185 .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c 186 .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 187 .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 188 .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 189 .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 190 .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a 191 .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e 192 .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e 193 .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 194 .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf 195 .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 196 .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 197.type AES_Te,#object 198.size AES_Te,(.-AES_Te) 199 200.align 64 201.skip 16 202_sparcv9_AES_encrypt: 203 save %sp,-$frame-$locals,%sp 204 stx %i7,[%sp+$bias+$frame+0] ! off-load return address 205 ld [$key+240],$rounds 206 ld [$key+0],$t0 207 ld [$key+4],$t1 ! 208 ld [$key+8],$t2 209 srl $rounds,1,$rounds 210 xor $t0,$s0,$s0 211 ld [$key+12],$t3 212 srl $s0,21,$acc0 213 xor $t1,$s1,$s1 214 ld [$key+16],$t0 215 srl $s1,13,$acc1 ! 216 xor $t2,$s2,$s2 217 ld [$key+20],$t1 218 xor $t3,$s3,$s3 219 ld [$key+24],$t2 220 and $acc0,2040,$acc0 221 ld [$key+28],$t3 222 nop 223.Lenc_loop: 224 srl $s2,5,$acc2 ! 225 and $acc1,2040,$acc1 226 ldx [$tbl+$acc0],$acc0 227 sll $s3,3,$acc3 228 and $acc2,2040,$acc2 229 ldx [$tbl+$acc1],$acc1 230 srl $s1,21,$acc4 231 and $acc3,2040,$acc3 232 ldx [$tbl+$acc2],$acc2 ! 233 srl $s2,13,$acc5 234 and $acc4,2040,$acc4 235 ldx [$tbl+$acc3],$acc3 236 srl $s3,5,$acc6 237 and $acc5,2040,$acc5 238 ldx [$tbl+$acc4],$acc4 239 fmovs %f0,%f0 240 sll $s0,3,$acc7 ! 241 and $acc6,2040,$acc6 242 ldx [$tbl+$acc5],$acc5 243 srl $s2,21,$acc8 244 and $acc7,2040,$acc7 245 ldx [$tbl+$acc6],$acc6 246 srl $s3,13,$acc9 247 and $acc8,2040,$acc8 248 ldx [$tbl+$acc7],$acc7 ! 249 srl $s0,5,$acc10 250 and $acc9,2040,$acc9 251 ldx [$tbl+$acc8],$acc8 252 sll $s1,3,$acc11 253 and $acc10,2040,$acc10 254 ldx [$tbl+$acc9],$acc9 255 fmovs %f0,%f0 256 srl $s3,21,$acc12 ! 257 and $acc11,2040,$acc11 258 ldx [$tbl+$acc10],$acc10 259 srl $s0,13,$acc13 260 and $acc12,2040,$acc12 261 ldx [$tbl+$acc11],$acc11 262 srl $s1,5,$acc14 263 and $acc13,2040,$acc13 264 ldx [$tbl+$acc12],$acc12 ! 265 sll $s2,3,$acc15 266 and $acc14,2040,$acc14 267 ldx [$tbl+$acc13],$acc13 268 and $acc15,2040,$acc15 269 add $key,32,$key 270 ldx [$tbl+$acc14],$acc14 271 fmovs %f0,%f0 272 subcc $rounds,1,$rounds ! 273 ldx [$tbl+$acc15],$acc15 274 bz,a,pn %icc,.Lenc_last 275 add $tbl,2048,$rounds 276 277 srlx $acc1,8,$acc1 278 xor $acc0,$t0,$t0 279 ld [$key+0],$s0 280 fmovs %f0,%f0 281 srlx $acc2,16,$acc2 ! 282 xor $acc1,$t0,$t0 283 ld [$key+4],$s1 284 srlx $acc3,24,$acc3 285 xor $acc2,$t0,$t0 286 ld [$key+8],$s2 287 srlx $acc5,8,$acc5 288 xor $acc3,$t0,$t0 289 ld [$key+12],$s3 ! 290 srlx $acc6,16,$acc6 291 xor $acc4,$t1,$t1 292 fmovs %f0,%f0 293 srlx $acc7,24,$acc7 294 xor $acc5,$t1,$t1 295 srlx $acc9,8,$acc9 296 xor $acc6,$t1,$t1 297 srlx $acc10,16,$acc10 ! 298 xor $acc7,$t1,$t1 299 srlx $acc11,24,$acc11 300 xor $acc8,$t2,$t2 301 srlx $acc13,8,$acc13 302 xor $acc9,$t2,$t2 303 srlx $acc14,16,$acc14 304 xor $acc10,$t2,$t2 305 srlx $acc15,24,$acc15 ! 306 xor $acc11,$t2,$t2 307 xor $acc12,$acc14,$acc14 308 xor $acc13,$t3,$t3 309 srl $t0,21,$acc0 310 xor $acc14,$t3,$t3 311 srl $t1,13,$acc1 312 xor $acc15,$t3,$t3 313 314 and $acc0,2040,$acc0 ! 315 srl $t2,5,$acc2 316 and $acc1,2040,$acc1 317 ldx [$tbl+$acc0],$acc0 318 sll $t3,3,$acc3 319 and $acc2,2040,$acc2 320 ldx [$tbl+$acc1],$acc1 321 fmovs %f0,%f0 322 srl $t1,21,$acc4 ! 323 and $acc3,2040,$acc3 324 ldx [$tbl+$acc2],$acc2 325 srl $t2,13,$acc5 326 and $acc4,2040,$acc4 327 ldx [$tbl+$acc3],$acc3 328 srl $t3,5,$acc6 329 and $acc5,2040,$acc5 330 ldx [$tbl+$acc4],$acc4 ! 331 sll $t0,3,$acc7 332 and $acc6,2040,$acc6 333 ldx [$tbl+$acc5],$acc5 334 srl $t2,21,$acc8 335 and $acc7,2040,$acc7 336 ldx [$tbl+$acc6],$acc6 337 fmovs %f0,%f0 338 srl $t3,13,$acc9 ! 339 and $acc8,2040,$acc8 340 ldx [$tbl+$acc7],$acc7 341 srl $t0,5,$acc10 342 and $acc9,2040,$acc9 343 ldx [$tbl+$acc8],$acc8 344 sll $t1,3,$acc11 345 and $acc10,2040,$acc10 346 ldx [$tbl+$acc9],$acc9 ! 347 srl $t3,21,$acc12 348 and $acc11,2040,$acc11 349 ldx [$tbl+$acc10],$acc10 350 srl $t0,13,$acc13 351 and $acc12,2040,$acc12 352 ldx [$tbl+$acc11],$acc11 353 fmovs %f0,%f0 354 srl $t1,5,$acc14 ! 355 and $acc13,2040,$acc13 356 ldx [$tbl+$acc12],$acc12 357 sll $t2,3,$acc15 358 and $acc14,2040,$acc14 359 ldx [$tbl+$acc13],$acc13 360 srlx $acc1,8,$acc1 361 and $acc15,2040,$acc15 362 ldx [$tbl+$acc14],$acc14 ! 363 364 srlx $acc2,16,$acc2 365 xor $acc0,$s0,$s0 366 ldx [$tbl+$acc15],$acc15 367 srlx $acc3,24,$acc3 368 xor $acc1,$s0,$s0 369 ld [$key+16],$t0 370 fmovs %f0,%f0 371 srlx $acc5,8,$acc5 ! 372 xor $acc2,$s0,$s0 373 ld [$key+20],$t1 374 srlx $acc6,16,$acc6 375 xor $acc3,$s0,$s0 376 ld [$key+24],$t2 377 srlx $acc7,24,$acc7 378 xor $acc4,$s1,$s1 379 ld [$key+28],$t3 ! 380 srlx $acc9,8,$acc9 381 xor $acc5,$s1,$s1 382 ldx [$tbl+2048+0],%g0 ! prefetch te4 383 srlx $acc10,16,$acc10 384 xor $acc6,$s1,$s1 385 ldx [$tbl+2048+32],%g0 ! prefetch te4 386 srlx $acc11,24,$acc11 387 xor $acc7,$s1,$s1 388 ldx [$tbl+2048+64],%g0 ! prefetch te4 389 srlx $acc13,8,$acc13 390 xor $acc8,$s2,$s2 391 ldx [$tbl+2048+96],%g0 ! prefetch te4 392 srlx $acc14,16,$acc14 ! 393 xor $acc9,$s2,$s2 394 ldx [$tbl+2048+128],%g0 ! prefetch te4 395 srlx $acc15,24,$acc15 396 xor $acc10,$s2,$s2 397 ldx [$tbl+2048+160],%g0 ! prefetch te4 398 srl $s0,21,$acc0 399 xor $acc11,$s2,$s2 400 ldx [$tbl+2048+192],%g0 ! prefetch te4 401 xor $acc12,$acc14,$acc14 402 xor $acc13,$s3,$s3 403 ldx [$tbl+2048+224],%g0 ! prefetch te4 404 srl $s1,13,$acc1 ! 405 xor $acc14,$s3,$s3 406 xor $acc15,$s3,$s3 407 ba .Lenc_loop 408 and $acc0,2040,$acc0 409 410.align 32 411.Lenc_last: 412 srlx $acc1,8,$acc1 ! 413 xor $acc0,$t0,$t0 414 ld [$key+0],$s0 415 srlx $acc2,16,$acc2 416 xor $acc1,$t0,$t0 417 ld [$key+4],$s1 418 srlx $acc3,24,$acc3 419 xor $acc2,$t0,$t0 420 ld [$key+8],$s2 ! 421 srlx $acc5,8,$acc5 422 xor $acc3,$t0,$t0 423 ld [$key+12],$s3 424 srlx $acc6,16,$acc6 425 xor $acc4,$t1,$t1 426 srlx $acc7,24,$acc7 427 xor $acc5,$t1,$t1 428 srlx $acc9,8,$acc9 ! 429 xor $acc6,$t1,$t1 430 srlx $acc10,16,$acc10 431 xor $acc7,$t1,$t1 432 srlx $acc11,24,$acc11 433 xor $acc8,$t2,$t2 434 srlx $acc13,8,$acc13 435 xor $acc9,$t2,$t2 436 srlx $acc14,16,$acc14 ! 437 xor $acc10,$t2,$t2 438 srlx $acc15,24,$acc15 439 xor $acc11,$t2,$t2 440 xor $acc12,$acc14,$acc14 441 xor $acc13,$t3,$t3 442 srl $t0,24,$acc0 443 xor $acc14,$t3,$t3 444 srl $t1,16,$acc1 ! 445 xor $acc15,$t3,$t3 446 447 srl $t2,8,$acc2 448 and $acc1,255,$acc1 449 ldub [$rounds+$acc0],$acc0 450 srl $t1,24,$acc4 451 and $acc2,255,$acc2 452 ldub [$rounds+$acc1],$acc1 453 srl $t2,16,$acc5 ! 454 and $t3,255,$acc3 455 ldub [$rounds+$acc2],$acc2 456 ldub [$rounds+$acc3],$acc3 457 srl $t3,8,$acc6 458 and $acc5,255,$acc5 459 ldub [$rounds+$acc4],$acc4 460 fmovs %f0,%f0 461 srl $t2,24,$acc8 ! 462 and $acc6,255,$acc6 463 ldub [$rounds+$acc5],$acc5 464 srl $t3,16,$acc9 465 and $t0,255,$acc7 466 ldub [$rounds+$acc6],$acc6 467 ldub [$rounds+$acc7],$acc7 468 fmovs %f0,%f0 469 srl $t0,8,$acc10 ! 470 and $acc9,255,$acc9 471 ldub [$rounds+$acc8],$acc8 472 srl $t3,24,$acc12 473 and $acc10,255,$acc10 474 ldub [$rounds+$acc9],$acc9 475 srl $t0,16,$acc13 476 and $t1,255,$acc11 477 ldub [$rounds+$acc10],$acc10 ! 478 srl $t1,8,$acc14 479 and $acc13,255,$acc13 480 ldub [$rounds+$acc11],$acc11 481 ldub [$rounds+$acc12],$acc12 482 and $acc14,255,$acc14 483 ldub [$rounds+$acc13],$acc13 484 and $t2,255,$acc15 485 ldub [$rounds+$acc14],$acc14 ! 486 487 sll $acc0,24,$acc0 488 xor $acc3,$s0,$s0 489 ldub [$rounds+$acc15],$acc15 490 sll $acc1,16,$acc1 491 xor $acc0,$s0,$s0 492 ldx [%sp+$bias+$frame+0],%i7 ! restore return address 493 fmovs %f0,%f0 494 sll $acc2,8,$acc2 ! 495 xor $acc1,$s0,$s0 496 sll $acc4,24,$acc4 497 xor $acc2,$s0,$s0 498 sll $acc5,16,$acc5 499 xor $acc7,$s1,$s1 500 sll $acc6,8,$acc6 501 xor $acc4,$s1,$s1 502 sll $acc8,24,$acc8 ! 503 xor $acc5,$s1,$s1 504 sll $acc9,16,$acc9 505 xor $acc11,$s2,$s2 506 sll $acc10,8,$acc10 507 xor $acc6,$s1,$s1 508 sll $acc12,24,$acc12 509 xor $acc8,$s2,$s2 510 sll $acc13,16,$acc13 ! 511 xor $acc9,$s2,$s2 512 sll $acc14,8,$acc14 513 xor $acc10,$s2,$s2 514 xor $acc12,$acc14,$acc14 515 xor $acc13,$s3,$s3 516 xor $acc14,$s3,$s3 517 xor $acc15,$s3,$s3 518 519 ret 520 restore 521.type _sparcv9_AES_encrypt,#function 522.size _sparcv9_AES_encrypt,(.-_sparcv9_AES_encrypt) 523 524.align 32 525.globl AES_encrypt 526AES_encrypt: 527 or %o0,%o1,%g1 528 andcc %g1,3,%g0 529 bnz,pn %xcc,.Lunaligned_enc 530 save %sp,-$frame,%sp 531 532 ld [%i0+0],%o0 533 ld [%i0+4],%o1 534 ld [%i0+8],%o2 535 ld [%i0+12],%o3 536 5371: call .+8 538 add %o7,AES_Te-1b,%o4 539 call _sparcv9_AES_encrypt 540 mov %i2,%o5 541 542 st %o0,[%i1+0] 543 st %o1,[%i1+4] 544 st %o2,[%i1+8] 545 st %o3,[%i1+12] 546 547 ret 548 restore 549 550.align 32 551.Lunaligned_enc: 552 ldub [%i0+0],%l0 553 ldub [%i0+1],%l1 554 ldub [%i0+2],%l2 555 556 sll %l0,24,%l0 557 ldub [%i0+3],%l3 558 sll %l1,16,%l1 559 ldub [%i0+4],%l4 560 sll %l2,8,%l2 561 or %l1,%l0,%l0 562 ldub [%i0+5],%l5 563 sll %l4,24,%l4 564 or %l3,%l2,%l2 565 ldub [%i0+6],%l6 566 sll %l5,16,%l5 567 or %l0,%l2,%o0 568 ldub [%i0+7],%l7 569 570 sll %l6,8,%l6 571 or %l5,%l4,%l4 572 ldub [%i0+8],%l0 573 or %l7,%l6,%l6 574 ldub [%i0+9],%l1 575 or %l4,%l6,%o1 576 ldub [%i0+10],%l2 577 578 sll %l0,24,%l0 579 ldub [%i0+11],%l3 580 sll %l1,16,%l1 581 ldub [%i0+12],%l4 582 sll %l2,8,%l2 583 or %l1,%l0,%l0 584 ldub [%i0+13],%l5 585 sll %l4,24,%l4 586 or %l3,%l2,%l2 587 ldub [%i0+14],%l6 588 sll %l5,16,%l5 589 or %l0,%l2,%o2 590 ldub [%i0+15],%l7 591 592 sll %l6,8,%l6 593 or %l5,%l4,%l4 594 or %l7,%l6,%l6 595 or %l4,%l6,%o3 596 5971: call .+8 598 add %o7,AES_Te-1b,%o4 599 call _sparcv9_AES_encrypt 600 mov %i2,%o5 601 602 srl %o0,24,%l0 603 srl %o0,16,%l1 604 stb %l0,[%i1+0] 605 srl %o0,8,%l2 606 stb %l1,[%i1+1] 607 stb %l2,[%i1+2] 608 srl %o1,24,%l4 609 stb %o0,[%i1+3] 610 611 srl %o1,16,%l5 612 stb %l4,[%i1+4] 613 srl %o1,8,%l6 614 stb %l5,[%i1+5] 615 stb %l6,[%i1+6] 616 srl %o2,24,%l0 617 stb %o1,[%i1+7] 618 619 srl %o2,16,%l1 620 stb %l0,[%i1+8] 621 srl %o2,8,%l2 622 stb %l1,[%i1+9] 623 stb %l2,[%i1+10] 624 srl %o3,24,%l4 625 stb %o2,[%i1+11] 626 627 srl %o3,16,%l5 628 stb %l4,[%i1+12] 629 srl %o3,8,%l6 630 stb %l5,[%i1+13] 631 stb %l6,[%i1+14] 632 stb %o3,[%i1+15] 633 634 ret 635 restore 636.type AES_encrypt,#function 637.size AES_encrypt,(.-AES_encrypt) 638 639___ 640 641$code.=<<___; 642.align 256 643AES_Td: 644___ 645&_data_word( 646 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96, 647 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393, 648 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25, 649 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f, 650 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1, 651 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6, 652 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da, 653 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844, 654 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd, 655 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4, 656 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45, 657 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94, 658 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7, 659 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a, 660 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5, 661 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c, 662 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1, 663 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a, 664 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75, 665 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051, 666 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46, 667 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff, 668 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77, 669 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb, 670 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000, 671 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e, 672 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927, 673 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a, 674 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e, 675 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16, 676 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d, 677 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8, 678 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd, 679 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34, 680 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163, 681 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120, 682 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d, 683 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0, 684 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422, 685 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef, 686 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36, 687 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4, 688 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662, 689 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5, 690 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3, 691 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b, 692 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8, 693 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6, 694 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6, 695 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0, 696 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815, 697 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f, 698 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df, 699 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f, 700 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e, 701 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713, 702 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89, 703 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c, 704 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf, 705 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86, 706 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f, 707 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541, 708 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190, 709 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742); 710$code.=<<___; 711 .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 712 .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb 713 .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 714 .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb 715 .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d 716 .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e 717 .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 718 .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 719 .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 720 .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 721 .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda 722 .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 723 .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a 724 .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 725 .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 726 .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b 727 .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea 728 .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 729 .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 730 .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e 731 .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 732 .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b 733 .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 734 .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 735 .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 736 .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f 737 .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d 738 .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef 739 .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 740 .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 741 .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 742 .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d 743.type AES_Td,#object 744.size AES_Td,(.-AES_Td) 745 746.align 64 747.skip 16 748_sparcv9_AES_decrypt: 749 save %sp,-$frame-$locals,%sp 750 stx %i7,[%sp+$bias+$frame+0] ! off-load return address 751 ld [$key+240],$rounds 752 ld [$key+0],$t0 753 ld [$key+4],$t1 ! 754 ld [$key+8],$t2 755 ld [$key+12],$t3 756 srl $rounds,1,$rounds 757 xor $t0,$s0,$s0 758 ld [$key+16],$t0 759 xor $t1,$s1,$s1 760 ld [$key+20],$t1 761 srl $s0,21,$acc0 ! 762 xor $t2,$s2,$s2 763 ld [$key+24],$t2 764 xor $t3,$s3,$s3 765 and $acc0,2040,$acc0 766 ld [$key+28],$t3 767 srl $s3,13,$acc1 768 nop 769.Ldec_loop: 770 srl $s2,5,$acc2 ! 771 and $acc1,2040,$acc1 772 ldx [$tbl+$acc0],$acc0 773 sll $s1,3,$acc3 774 and $acc2,2040,$acc2 775 ldx [$tbl+$acc1],$acc1 776 srl $s1,21,$acc4 777 and $acc3,2040,$acc3 778 ldx [$tbl+$acc2],$acc2 ! 779 srl $s0,13,$acc5 780 and $acc4,2040,$acc4 781 ldx [$tbl+$acc3],$acc3 782 srl $s3,5,$acc6 783 and $acc5,2040,$acc5 784 ldx [$tbl+$acc4],$acc4 785 fmovs %f0,%f0 786 sll $s2,3,$acc7 ! 787 and $acc6,2040,$acc6 788 ldx [$tbl+$acc5],$acc5 789 srl $s2,21,$acc8 790 and $acc7,2040,$acc7 791 ldx [$tbl+$acc6],$acc6 792 srl $s1,13,$acc9 793 and $acc8,2040,$acc8 794 ldx [$tbl+$acc7],$acc7 ! 795 srl $s0,5,$acc10 796 and $acc9,2040,$acc9 797 ldx [$tbl+$acc8],$acc8 798 sll $s3,3,$acc11 799 and $acc10,2040,$acc10 800 ldx [$tbl+$acc9],$acc9 801 fmovs %f0,%f0 802 srl $s3,21,$acc12 ! 803 and $acc11,2040,$acc11 804 ldx [$tbl+$acc10],$acc10 805 srl $s2,13,$acc13 806 and $acc12,2040,$acc12 807 ldx [$tbl+$acc11],$acc11 808 srl $s1,5,$acc14 809 and $acc13,2040,$acc13 810 ldx [$tbl+$acc12],$acc12 ! 811 sll $s0,3,$acc15 812 and $acc14,2040,$acc14 813 ldx [$tbl+$acc13],$acc13 814 and $acc15,2040,$acc15 815 add $key,32,$key 816 ldx [$tbl+$acc14],$acc14 817 fmovs %f0,%f0 818 subcc $rounds,1,$rounds ! 819 ldx [$tbl+$acc15],$acc15 820 bz,a,pn %icc,.Ldec_last 821 add $tbl,2048,$rounds 822 823 srlx $acc1,8,$acc1 824 xor $acc0,$t0,$t0 825 ld [$key+0],$s0 826 fmovs %f0,%f0 827 srlx $acc2,16,$acc2 ! 828 xor $acc1,$t0,$t0 829 ld [$key+4],$s1 830 srlx $acc3,24,$acc3 831 xor $acc2,$t0,$t0 832 ld [$key+8],$s2 833 srlx $acc5,8,$acc5 834 xor $acc3,$t0,$t0 835 ld [$key+12],$s3 ! 836 srlx $acc6,16,$acc6 837 xor $acc4,$t1,$t1 838 fmovs %f0,%f0 839 srlx $acc7,24,$acc7 840 xor $acc5,$t1,$t1 841 srlx $acc9,8,$acc9 842 xor $acc6,$t1,$t1 843 srlx $acc10,16,$acc10 ! 844 xor $acc7,$t1,$t1 845 srlx $acc11,24,$acc11 846 xor $acc8,$t2,$t2 847 srlx $acc13,8,$acc13 848 xor $acc9,$t2,$t2 849 srlx $acc14,16,$acc14 850 xor $acc10,$t2,$t2 851 srlx $acc15,24,$acc15 ! 852 xor $acc11,$t2,$t2 853 xor $acc12,$acc14,$acc14 854 xor $acc13,$t3,$t3 855 srl $t0,21,$acc0 856 xor $acc14,$t3,$t3 857 xor $acc15,$t3,$t3 858 srl $t3,13,$acc1 859 860 and $acc0,2040,$acc0 ! 861 srl $t2,5,$acc2 862 and $acc1,2040,$acc1 863 ldx [$tbl+$acc0],$acc0 864 sll $t1,3,$acc3 865 and $acc2,2040,$acc2 866 ldx [$tbl+$acc1],$acc1 867 fmovs %f0,%f0 868 srl $t1,21,$acc4 ! 869 and $acc3,2040,$acc3 870 ldx [$tbl+$acc2],$acc2 871 srl $t0,13,$acc5 872 and $acc4,2040,$acc4 873 ldx [$tbl+$acc3],$acc3 874 srl $t3,5,$acc6 875 and $acc5,2040,$acc5 876 ldx [$tbl+$acc4],$acc4 ! 877 sll $t2,3,$acc7 878 and $acc6,2040,$acc6 879 ldx [$tbl+$acc5],$acc5 880 srl $t2,21,$acc8 881 and $acc7,2040,$acc7 882 ldx [$tbl+$acc6],$acc6 883 fmovs %f0,%f0 884 srl $t1,13,$acc9 ! 885 and $acc8,2040,$acc8 886 ldx [$tbl+$acc7],$acc7 887 srl $t0,5,$acc10 888 and $acc9,2040,$acc9 889 ldx [$tbl+$acc8],$acc8 890 sll $t3,3,$acc11 891 and $acc10,2040,$acc10 892 ldx [$tbl+$acc9],$acc9 ! 893 srl $t3,21,$acc12 894 and $acc11,2040,$acc11 895 ldx [$tbl+$acc10],$acc10 896 srl $t2,13,$acc13 897 and $acc12,2040,$acc12 898 ldx [$tbl+$acc11],$acc11 899 fmovs %f0,%f0 900 srl $t1,5,$acc14 ! 901 and $acc13,2040,$acc13 902 ldx [$tbl+$acc12],$acc12 903 sll $t0,3,$acc15 904 and $acc14,2040,$acc14 905 ldx [$tbl+$acc13],$acc13 906 srlx $acc1,8,$acc1 907 and $acc15,2040,$acc15 908 ldx [$tbl+$acc14],$acc14 ! 909 910 srlx $acc2,16,$acc2 911 xor $acc0,$s0,$s0 912 ldx [$tbl+$acc15],$acc15 913 srlx $acc3,24,$acc3 914 xor $acc1,$s0,$s0 915 ld [$key+16],$t0 916 fmovs %f0,%f0 917 srlx $acc5,8,$acc5 ! 918 xor $acc2,$s0,$s0 919 ld [$key+20],$t1 920 srlx $acc6,16,$acc6 921 xor $acc3,$s0,$s0 922 ld [$key+24],$t2 923 srlx $acc7,24,$acc7 924 xor $acc4,$s1,$s1 925 ld [$key+28],$t3 ! 926 srlx $acc9,8,$acc9 927 xor $acc5,$s1,$s1 928 ldx [$tbl+2048+0],%g0 ! prefetch td4 929 srlx $acc10,16,$acc10 930 xor $acc6,$s1,$s1 931 ldx [$tbl+2048+32],%g0 ! prefetch td4 932 srlx $acc11,24,$acc11 933 xor $acc7,$s1,$s1 934 ldx [$tbl+2048+64],%g0 ! prefetch td4 935 srlx $acc13,8,$acc13 936 xor $acc8,$s2,$s2 937 ldx [$tbl+2048+96],%g0 ! prefetch td4 938 srlx $acc14,16,$acc14 ! 939 xor $acc9,$s2,$s2 940 ldx [$tbl+2048+128],%g0 ! prefetch td4 941 srlx $acc15,24,$acc15 942 xor $acc10,$s2,$s2 943 ldx [$tbl+2048+160],%g0 ! prefetch td4 944 srl $s0,21,$acc0 945 xor $acc11,$s2,$s2 946 ldx [$tbl+2048+192],%g0 ! prefetch td4 947 xor $acc12,$acc14,$acc14 948 xor $acc13,$s3,$s3 949 ldx [$tbl+2048+224],%g0 ! prefetch td4 950 and $acc0,2040,$acc0 ! 951 xor $acc14,$s3,$s3 952 xor $acc15,$s3,$s3 953 ba .Ldec_loop 954 srl $s3,13,$acc1 955 956.align 32 957.Ldec_last: 958 srlx $acc1,8,$acc1 ! 959 xor $acc0,$t0,$t0 960 ld [$key+0],$s0 961 srlx $acc2,16,$acc2 962 xor $acc1,$t0,$t0 963 ld [$key+4],$s1 964 srlx $acc3,24,$acc3 965 xor $acc2,$t0,$t0 966 ld [$key+8],$s2 ! 967 srlx $acc5,8,$acc5 968 xor $acc3,$t0,$t0 969 ld [$key+12],$s3 970 srlx $acc6,16,$acc6 971 xor $acc4,$t1,$t1 972 srlx $acc7,24,$acc7 973 xor $acc5,$t1,$t1 974 srlx $acc9,8,$acc9 ! 975 xor $acc6,$t1,$t1 976 srlx $acc10,16,$acc10 977 xor $acc7,$t1,$t1 978 srlx $acc11,24,$acc11 979 xor $acc8,$t2,$t2 980 srlx $acc13,8,$acc13 981 xor $acc9,$t2,$t2 982 srlx $acc14,16,$acc14 ! 983 xor $acc10,$t2,$t2 984 srlx $acc15,24,$acc15 985 xor $acc11,$t2,$t2 986 xor $acc12,$acc14,$acc14 987 xor $acc13,$t3,$t3 988 srl $t0,24,$acc0 989 xor $acc14,$t3,$t3 990 xor $acc15,$t3,$t3 ! 991 srl $t3,16,$acc1 992 993 srl $t2,8,$acc2 994 and $acc1,255,$acc1 995 ldub [$rounds+$acc0],$acc0 996 srl $t1,24,$acc4 997 and $acc2,255,$acc2 998 ldub [$rounds+$acc1],$acc1 999 srl $t0,16,$acc5 ! 1000 and $t1,255,$acc3 1001 ldub [$rounds+$acc2],$acc2 1002 ldub [$rounds+$acc3],$acc3 1003 srl $t3,8,$acc6 1004 and $acc5,255,$acc5 1005 ldub [$rounds+$acc4],$acc4 1006 fmovs %f0,%f0 1007 srl $t2,24,$acc8 ! 1008 and $acc6,255,$acc6 1009 ldub [$rounds+$acc5],$acc5 1010 srl $t1,16,$acc9 1011 and $t2,255,$acc7 1012 ldub [$rounds+$acc6],$acc6 1013 ldub [$rounds+$acc7],$acc7 1014 fmovs %f0,%f0 1015 srl $t0,8,$acc10 ! 1016 and $acc9,255,$acc9 1017 ldub [$rounds+$acc8],$acc8 1018 srl $t3,24,$acc12 1019 and $acc10,255,$acc10 1020 ldub [$rounds+$acc9],$acc9 1021 srl $t2,16,$acc13 1022 and $t3,255,$acc11 1023 ldub [$rounds+$acc10],$acc10 ! 1024 srl $t1,8,$acc14 1025 and $acc13,255,$acc13 1026 ldub [$rounds+$acc11],$acc11 1027 ldub [$rounds+$acc12],$acc12 1028 and $acc14,255,$acc14 1029 ldub [$rounds+$acc13],$acc13 1030 and $t0,255,$acc15 1031 ldub [$rounds+$acc14],$acc14 ! 1032 1033 sll $acc0,24,$acc0 1034 xor $acc3,$s0,$s0 1035 ldub [$rounds+$acc15],$acc15 1036 sll $acc1,16,$acc1 1037 xor $acc0,$s0,$s0 1038 ldx [%sp+$bias+$frame+0],%i7 ! restore return address 1039 fmovs %f0,%f0 1040 sll $acc2,8,$acc2 ! 1041 xor $acc1,$s0,$s0 1042 sll $acc4,24,$acc4 1043 xor $acc2,$s0,$s0 1044 sll $acc5,16,$acc5 1045 xor $acc7,$s1,$s1 1046 sll $acc6,8,$acc6 1047 xor $acc4,$s1,$s1 1048 sll $acc8,24,$acc8 ! 1049 xor $acc5,$s1,$s1 1050 sll $acc9,16,$acc9 1051 xor $acc11,$s2,$s2 1052 sll $acc10,8,$acc10 1053 xor $acc6,$s1,$s1 1054 sll $acc12,24,$acc12 1055 xor $acc8,$s2,$s2 1056 sll $acc13,16,$acc13 ! 1057 xor $acc9,$s2,$s2 1058 sll $acc14,8,$acc14 1059 xor $acc10,$s2,$s2 1060 xor $acc12,$acc14,$acc14 1061 xor $acc13,$s3,$s3 1062 xor $acc14,$s3,$s3 1063 xor $acc15,$s3,$s3 1064 1065 ret 1066 restore 1067.type _sparcv9_AES_decrypt,#function 1068.size _sparcv9_AES_decrypt,(.-_sparcv9_AES_decrypt) 1069 1070.align 32 1071.globl AES_decrypt 1072AES_decrypt: 1073 or %o0,%o1,%g1 1074 andcc %g1,3,%g0 1075 bnz,pn %xcc,.Lunaligned_dec 1076 save %sp,-$frame,%sp 1077 1078 ld [%i0+0],%o0 1079 ld [%i0+4],%o1 1080 ld [%i0+8],%o2 1081 ld [%i0+12],%o3 1082 10831: call .+8 1084 add %o7,AES_Td-1b,%o4 1085 call _sparcv9_AES_decrypt 1086 mov %i2,%o5 1087 1088 st %o0,[%i1+0] 1089 st %o1,[%i1+4] 1090 st %o2,[%i1+8] 1091 st %o3,[%i1+12] 1092 1093 ret 1094 restore 1095 1096.align 32 1097.Lunaligned_dec: 1098 ldub [%i0+0],%l0 1099 ldub [%i0+1],%l1 1100 ldub [%i0+2],%l2 1101 1102 sll %l0,24,%l0 1103 ldub [%i0+3],%l3 1104 sll %l1,16,%l1 1105 ldub [%i0+4],%l4 1106 sll %l2,8,%l2 1107 or %l1,%l0,%l0 1108 ldub [%i0+5],%l5 1109 sll %l4,24,%l4 1110 or %l3,%l2,%l2 1111 ldub [%i0+6],%l6 1112 sll %l5,16,%l5 1113 or %l0,%l2,%o0 1114 ldub [%i0+7],%l7 1115 1116 sll %l6,8,%l6 1117 or %l5,%l4,%l4 1118 ldub [%i0+8],%l0 1119 or %l7,%l6,%l6 1120 ldub [%i0+9],%l1 1121 or %l4,%l6,%o1 1122 ldub [%i0+10],%l2 1123 1124 sll %l0,24,%l0 1125 ldub [%i0+11],%l3 1126 sll %l1,16,%l1 1127 ldub [%i0+12],%l4 1128 sll %l2,8,%l2 1129 or %l1,%l0,%l0 1130 ldub [%i0+13],%l5 1131 sll %l4,24,%l4 1132 or %l3,%l2,%l2 1133 ldub [%i0+14],%l6 1134 sll %l5,16,%l5 1135 or %l0,%l2,%o2 1136 ldub [%i0+15],%l7 1137 1138 sll %l6,8,%l6 1139 or %l5,%l4,%l4 1140 or %l7,%l6,%l6 1141 or %l4,%l6,%o3 1142 11431: call .+8 1144 add %o7,AES_Td-1b,%o4 1145 call _sparcv9_AES_decrypt 1146 mov %i2,%o5 1147 1148 srl %o0,24,%l0 1149 srl %o0,16,%l1 1150 stb %l0,[%i1+0] 1151 srl %o0,8,%l2 1152 stb %l1,[%i1+1] 1153 stb %l2,[%i1+2] 1154 srl %o1,24,%l4 1155 stb %o0,[%i1+3] 1156 1157 srl %o1,16,%l5 1158 stb %l4,[%i1+4] 1159 srl %o1,8,%l6 1160 stb %l5,[%i1+5] 1161 stb %l6,[%i1+6] 1162 srl %o2,24,%l0 1163 stb %o1,[%i1+7] 1164 1165 srl %o2,16,%l1 1166 stb %l0,[%i1+8] 1167 srl %o2,8,%l2 1168 stb %l1,[%i1+9] 1169 stb %l2,[%i1+10] 1170 srl %o3,24,%l4 1171 stb %o2,[%i1+11] 1172 1173 srl %o3,16,%l5 1174 stb %l4,[%i1+12] 1175 srl %o3,8,%l6 1176 stb %l5,[%i1+13] 1177 stb %l6,[%i1+14] 1178 stb %o3,[%i1+15] 1179 1180 ret 1181 restore 1182.type AES_decrypt,#function 1183.size AES_decrypt,(.-AES_decrypt) 1184___ 1185 1186# fmovs instructions substituting for FP nops were originally added 1187# to meet specific instruction alignment requirements to maximize ILP. 1188# As UltraSPARC T1, a.k.a. Niagara, has shared FPU, FP nops can have 1189# undesired effect, so just omit them and sacrifice some portion of 1190# percent in performance... 1191$code =~ s/fmovs.*$//gm; 1192 1193print $code; 1194close STDOUT or die "error closing STDOUT: $!"; # ensure flush 1195