1#! /usr/bin/env perl 2# Copyright 2012-2021 The OpenSSL Project Authors. All Rights Reserved. 3# 4# Licensed under the Apache License 2.0 (the "License"). You may not use 5# this file except in compliance with the License. You can obtain a copy 6# in the file LICENSE in the source distribution or at 7# https://www.openssl.org/source/license.html 8 9 10# ==================================================================== 11# Written by David S. Miller and Andy Polyakov. 12# The module is licensed under 2-clause BSD 13# license. October 2012. All rights reserved. 14# ==================================================================== 15 16###################################################################### 17# Camellia for SPARC T4. 18# 19# As with AES below results [for aligned data] are virtually identical 20# to critical path lengths for 3-cycle instruction latency: 21# 22# 128-bit key 192/256- 23# CBC encrypt 4.14/4.21(*) 5.46/5.52 24# (*) numbers after slash are for 25# misaligned data. 26# 27# As with Intel AES-NI, question is if it's possible to improve 28# performance of parallelizable modes by interleaving round 29# instructions. In Camellia every instruction is dependent on 30# previous, which means that there is place for 2 additional ones 31# in between two dependent. Can we expect 3x performance improvement? 32# At least one can argue that it should be possible to break 2x 33# barrier... For some reason not even 2x appears to be possible: 34# 35# 128-bit key 192/256- 36# CBC decrypt 2.21/2.74 2.99/3.40 37# CTR 2.15/2.68(*) 2.93/3.34 38# (*) numbers after slash are for 39# misaligned data. 40# 41# This is for 2x interleave. But compared to 1x interleave CBC decrypt 42# improved by ... 0% for 128-bit key, and 11% for 192/256-bit one. 43# So that out-of-order execution logic can take non-interleaved code 44# to 1.87x, but can't take 2x interleaved one any further. There 45# surely is some explanation... As result 3x interleave was not even 46# attempted. Instead an effort was made to share specific modes 47# implementations with AES module (therefore sparct4_modes.pl). 48# 49# To anchor to something else, software C implementation processes 50# one byte in 38 cycles with 128-bit key on same processor. 51 52$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 53push(@INC,"${dir}","${dir}../../perlasm"); 54require "sparcv9_modes.pl"; 55 56$output = pop and open STDOUT,">$output"; 57 58$::evp=1; # if $evp is set to 0, script generates module with 59# Camellia_[en|de]crypt, Camellia_set_key and Camellia_cbc_encrypt 60# entry points. These are fully compatible with openssl/camellia.h. 61 62###################################################################### 63# single-round subroutines 64# 65{ 66my ($inp,$out,$key,$rounds,$tmp,$mask)=map("%o$_",(0..5)); 67 68$code=<<___; 69#ifndef __ASSEMBLER__ 70# define __ASSEMBLER__ 1 71#endif 72#include "crypto/sparc_arch.h" 73 74.text 75 76.globl cmll_t4_encrypt 77.align 32 78cmll_t4_encrypt: 79 andcc $inp, 7, %g1 ! is input aligned? 80 andn $inp, 7, $inp 81 82 ldx [$key + 0], %g4 83 ldx [$key + 8], %g5 84 85 ldx [$inp + 0], %o4 86 bz,pt %icc, 1f 87 ldx [$inp + 8], %o5 88 ldx [$inp + 16], $inp 89 sll %g1, 3, %g1 90 sub %g0, %g1, %o3 91 sllx %o4, %g1, %o4 92 sllx %o5, %g1, %g1 93 srlx %o5, %o3, %o5 94 srlx $inp, %o3, %o3 95 or %o5, %o4, %o4 96 or %o3, %g1, %o5 971: 98 ld [$key + 272], $rounds ! grandRounds, 3 or 4 99 ldd [$key + 16], %f12 100 ldd [$key + 24], %f14 101 xor %g4, %o4, %o4 102 xor %g5, %o5, %o5 103 ldd [$key + 32], %f16 104 ldd [$key + 40], %f18 105 movxtod %o4, %f0 106 movxtod %o5, %f2 107 ldd [$key + 48], %f20 108 ldd [$key + 56], %f22 109 sub $rounds, 1, $rounds 110 ldd [$key + 64], %f24 111 ldd [$key + 72], %f26 112 add $key, 80, $key 113 114.Lenc: 115 camellia_f %f12, %f2, %f0, %f2 116 ldd [$key + 0], %f12 117 sub $rounds,1,$rounds 118 camellia_f %f14, %f0, %f2, %f0 119 ldd [$key + 8], %f14 120 camellia_f %f16, %f2, %f0, %f2 121 ldd [$key + 16], %f16 122 camellia_f %f18, %f0, %f2, %f0 123 ldd [$key + 24], %f18 124 camellia_f %f20, %f2, %f0, %f2 125 ldd [$key + 32], %f20 126 camellia_f %f22, %f0, %f2, %f0 127 ldd [$key + 40], %f22 128 camellia_fl %f24, %f0, %f0 129 ldd [$key + 48], %f24 130 camellia_fli %f26, %f2, %f2 131 ldd [$key + 56], %f26 132 brnz,pt $rounds, .Lenc 133 add $key, 64, $key 134 135 andcc $out, 7, $tmp ! is output aligned? 136 camellia_f %f12, %f2, %f0, %f2 137 camellia_f %f14, %f0, %f2, %f0 138 camellia_f %f16, %f2, %f0, %f2 139 camellia_f %f18, %f0, %f2, %f0 140 camellia_f %f20, %f2, %f0, %f4 141 camellia_f %f22, %f0, %f4, %f2 142 fxor %f24, %f4, %f0 143 fxor %f26, %f2, %f2 144 145 bnz,pn %icc, 2f 146 nop 147 148 std %f0, [$out + 0] 149 retl 150 std %f2, [$out + 8] 151 1522: alignaddrl $out, %g0, $out 153 mov 0xff, $mask 154 srl $mask, $tmp, $mask 155 156 faligndata %f0, %f0, %f4 157 faligndata %f0, %f2, %f6 158 faligndata %f2, %f2, %f8 159 160 stda %f4, [$out + $mask]0xc0 ! partial store 161 std %f6, [$out + 8] 162 add $out, 16, $out 163 orn %g0, $mask, $mask 164 retl 165 stda %f8, [$out + $mask]0xc0 ! partial store 166.type cmll_t4_encrypt,#function 167.size cmll_t4_encrypt,.-cmll_t4_encrypt 168 169.globl cmll_t4_decrypt 170.align 32 171cmll_t4_decrypt: 172 ld [$key + 272], $rounds ! grandRounds, 3 or 4 173 andcc $inp, 7, %g1 ! is input aligned? 174 andn $inp, 7, $inp 175 176 sll $rounds, 6, $rounds 177 add $rounds, $key, $key 178 179 ldx [$inp + 0], %o4 180 bz,pt %icc, 1f 181 ldx [$inp + 8], %o5 182 ldx [$inp + 16], $inp 183 sll %g1, 3, %g1 184 sub %g0, %g1, %g4 185 sllx %o4, %g1, %o4 186 sllx %o5, %g1, %g1 187 srlx %o5, %g4, %o5 188 srlx $inp, %g4, %g4 189 or %o5, %o4, %o4 190 or %g4, %g1, %o5 1911: 192 ldx [$key + 0], %g4 193 ldx [$key + 8], %g5 194 ldd [$key - 8], %f12 195 ldd [$key - 16], %f14 196 xor %g4, %o4, %o4 197 xor %g5, %o5, %o5 198 ldd [$key - 24], %f16 199 ldd [$key - 32], %f18 200 movxtod %o4, %f0 201 movxtod %o5, %f2 202 ldd [$key - 40], %f20 203 ldd [$key - 48], %f22 204 sub $rounds, 64, $rounds 205 ldd [$key - 56], %f24 206 ldd [$key - 64], %f26 207 sub $key, 64, $key 208 209.Ldec: 210 camellia_f %f12, %f2, %f0, %f2 211 ldd [$key - 8], %f12 212 sub $rounds, 64, $rounds 213 camellia_f %f14, %f0, %f2, %f0 214 ldd [$key - 16], %f14 215 camellia_f %f16, %f2, %f0, %f2 216 ldd [$key - 24], %f16 217 camellia_f %f18, %f0, %f2, %f0 218 ldd [$key - 32], %f18 219 camellia_f %f20, %f2, %f0, %f2 220 ldd [$key - 40], %f20 221 camellia_f %f22, %f0, %f2, %f0 222 ldd [$key - 48], %f22 223 camellia_fl %f24, %f0, %f0 224 ldd [$key - 56], %f24 225 camellia_fli %f26, %f2, %f2 226 ldd [$key - 64], %f26 227 brnz,pt $rounds, .Ldec 228 sub $key, 64, $key 229 230 andcc $out, 7, $tmp ! is output aligned? 231 camellia_f %f12, %f2, %f0, %f2 232 camellia_f %f14, %f0, %f2, %f0 233 camellia_f %f16, %f2, %f0, %f2 234 camellia_f %f18, %f0, %f2, %f0 235 camellia_f %f20, %f2, %f0, %f4 236 camellia_f %f22, %f0, %f4, %f2 237 fxor %f26, %f4, %f0 238 fxor %f24, %f2, %f2 239 240 bnz,pn %icc, 2f 241 nop 242 243 std %f0, [$out + 0] 244 retl 245 std %f2, [$out + 8] 246 2472: alignaddrl $out, %g0, $out 248 mov 0xff, $mask 249 srl $mask, $tmp, $mask 250 251 faligndata %f0, %f0, %f4 252 faligndata %f0, %f2, %f6 253 faligndata %f2, %f2, %f8 254 255 stda %f4, [$out + $mask]0xc0 ! partial store 256 std %f6, [$out + 8] 257 add $out, 16, $out 258 orn %g0, $mask, $mask 259 retl 260 stda %f8, [$out + $mask]0xc0 ! partial store 261.type cmll_t4_decrypt,#function 262.size cmll_t4_decrypt,.-cmll_t4_decrypt 263___ 264} 265 266###################################################################### 267# key setup subroutines 268# 269{ 270sub ROTL128 { 271 my $rot = shift; 272 273 "srlx %o4, 64-$rot, %g4\n\t". 274 "sllx %o4, $rot, %o4\n\t". 275 "srlx %o5, 64-$rot, %g5\n\t". 276 "sllx %o5, $rot, %o5\n\t". 277 "or %o4, %g5, %o4\n\t". 278 "or %o5, %g4, %o5"; 279} 280 281my ($inp,$bits,$out,$tmp)=map("%o$_",(0..5)); 282$code.=<<___; 283.globl cmll_t4_set_key 284.align 32 285cmll_t4_set_key: 286 and $inp, 7, $tmp 287 alignaddr $inp, %g0, $inp 288 cmp $bits, 192 289 ldd [$inp + 0], %f0 290 bl,pt %icc,.L128 291 ldd [$inp + 8], %f2 292 293 be,pt %icc,.L192 294 ldd [$inp + 16], %f4 295 296 brz,pt $tmp, .L256aligned 297 ldd [$inp + 24], %f6 298 299 ldd [$inp + 32], %f8 300 faligndata %f0, %f2, %f0 301 faligndata %f2, %f4, %f2 302 faligndata %f4, %f6, %f4 303 b .L256aligned 304 faligndata %f6, %f8, %f6 305 306.align 16 307.L192: 308 brz,a,pt $tmp, .L256aligned 309 fnot2 %f4, %f6 310 311 ldd [$inp + 24], %f6 312 nop 313 faligndata %f0, %f2, %f0 314 faligndata %f2, %f4, %f2 315 faligndata %f4, %f6, %f4 316 fnot2 %f4, %f6 317 318.L256aligned: 319 std %f0, [$out + 0] ! k[0, 1] 320 fsrc2 %f0, %f28 321 std %f2, [$out + 8] ! k[2, 3] 322 fsrc2 %f2, %f30 323 fxor %f4, %f0, %f0 324 b .L128key 325 fxor %f6, %f2, %f2 326 327.align 16 328.L128: 329 brz,pt $tmp, .L128aligned 330 nop 331 332 ldd [$inp + 16], %f4 333 nop 334 faligndata %f0, %f2, %f0 335 faligndata %f2, %f4, %f2 336 337.L128aligned: 338 std %f0, [$out + 0] ! k[0, 1] 339 fsrc2 %f0, %f28 340 std %f2, [$out + 8] ! k[2, 3] 341 fsrc2 %f2, %f30 342 343.L128key: 344 mov %o7, %o5 3451: call .+8 346 add %o7, SIGMA-1b, %o4 347 mov %o5, %o7 348 349 ldd [%o4 + 0], %f16 350 ldd [%o4 + 8], %f18 351 ldd [%o4 + 16], %f20 352 ldd [%o4 + 24], %f22 353 354 camellia_f %f16, %f2, %f0, %f2 355 camellia_f %f18, %f0, %f2, %f0 356 fxor %f28, %f0, %f0 357 fxor %f30, %f2, %f2 358 camellia_f %f20, %f2, %f0, %f2 359 camellia_f %f22, %f0, %f2, %f0 360 361 bge,pn %icc, .L256key 362 nop 363 std %f0, [$out + 0x10] ! k[ 4, 5] 364 std %f2, [$out + 0x18] ! k[ 6, 7] 365 366 movdtox %f0, %o4 367 movdtox %f2, %o5 368 `&ROTL128(15)` 369 stx %o4, [$out + 0x30] ! k[12, 13] 370 stx %o5, [$out + 0x38] ! k[14, 15] 371 `&ROTL128(15)` 372 stx %o4, [$out + 0x40] ! k[16, 17] 373 stx %o5, [$out + 0x48] ! k[18, 19] 374 `&ROTL128(15)` 375 stx %o4, [$out + 0x60] ! k[24, 25] 376 `&ROTL128(15)` 377 stx %o4, [$out + 0x70] ! k[28, 29] 378 stx %o5, [$out + 0x78] ! k[30, 31] 379 `&ROTL128(34)` 380 stx %o4, [$out + 0xa0] ! k[40, 41] 381 stx %o5, [$out + 0xa8] ! k[42, 43] 382 `&ROTL128(17)` 383 stx %o4, [$out + 0xc0] ! k[48, 49] 384 stx %o5, [$out + 0xc8] ! k[50, 51] 385 386 movdtox %f28, %o4 ! k[ 0, 1] 387 movdtox %f30, %o5 ! k[ 2, 3] 388 `&ROTL128(15)` 389 stx %o4, [$out + 0x20] ! k[ 8, 9] 390 stx %o5, [$out + 0x28] ! k[10, 11] 391 `&ROTL128(30)` 392 stx %o4, [$out + 0x50] ! k[20, 21] 393 stx %o5, [$out + 0x58] ! k[22, 23] 394 `&ROTL128(15)` 395 stx %o5, [$out + 0x68] ! k[26, 27] 396 `&ROTL128(17)` 397 stx %o4, [$out + 0x80] ! k[32, 33] 398 stx %o5, [$out + 0x88] ! k[34, 35] 399 `&ROTL128(17)` 400 stx %o4, [$out + 0x90] ! k[36, 37] 401 stx %o5, [$out + 0x98] ! k[38, 39] 402 `&ROTL128(17)` 403 stx %o4, [$out + 0xb0] ! k[44, 45] 404 stx %o5, [$out + 0xb8] ! k[46, 47] 405 406 mov 3, $tmp 407 st $tmp, [$out + 0x110] 408 retl 409 xor %o0, %o0, %o0 410 411.align 16 412.L256key: 413 ldd [%o4 + 32], %f24 414 ldd [%o4 + 40], %f26 415 416 std %f0, [$out + 0x30] ! k[12, 13] 417 std %f2, [$out + 0x38] ! k[14, 15] 418 419 fxor %f4, %f0, %f0 420 fxor %f6, %f2, %f2 421 camellia_f %f24, %f2, %f0, %f2 422 camellia_f %f26, %f0, %f2, %f0 423 424 std %f0, [$out + 0x10] ! k[ 4, 5] 425 std %f2, [$out + 0x18] ! k[ 6, 7] 426 427 movdtox %f0, %o4 428 movdtox %f2, %o5 429 `&ROTL128(30)` 430 stx %o4, [$out + 0x50] ! k[20, 21] 431 stx %o5, [$out + 0x58] ! k[22, 23] 432 `&ROTL128(30)` 433 stx %o4, [$out + 0xa0] ! k[40, 41] 434 stx %o5, [$out + 0xa8] ! k[42, 43] 435 `&ROTL128(51)` 436 stx %o4, [$out + 0x100] ! k[64, 65] 437 stx %o5, [$out + 0x108] ! k[66, 67] 438 439 movdtox %f4, %o4 ! k[ 8, 9] 440 movdtox %f6, %o5 ! k[10, 11] 441 `&ROTL128(15)` 442 stx %o4, [$out + 0x20] ! k[ 8, 9] 443 stx %o5, [$out + 0x28] ! k[10, 11] 444 `&ROTL128(15)` 445 stx %o4, [$out + 0x40] ! k[16, 17] 446 stx %o5, [$out + 0x48] ! k[18, 19] 447 `&ROTL128(30)` 448 stx %o4, [$out + 0x90] ! k[36, 37] 449 stx %o5, [$out + 0x98] ! k[38, 39] 450 `&ROTL128(34)` 451 stx %o4, [$out + 0xd0] ! k[52, 53] 452 stx %o5, [$out + 0xd8] ! k[54, 55] 453 ldx [$out + 0x30], %o4 ! k[12, 13] 454 ldx [$out + 0x38], %o5 ! k[14, 15] 455 `&ROTL128(15)` 456 stx %o4, [$out + 0x30] ! k[12, 13] 457 stx %o5, [$out + 0x38] ! k[14, 15] 458 `&ROTL128(30)` 459 stx %o4, [$out + 0x70] ! k[28, 29] 460 stx %o5, [$out + 0x78] ! k[30, 31] 461 srlx %o4, 32, %g4 462 srlx %o5, 32, %g5 463 st %o4, [$out + 0xc0] ! k[48] 464 st %g5, [$out + 0xc4] ! k[49] 465 st %o5, [$out + 0xc8] ! k[50] 466 st %g4, [$out + 0xcc] ! k[51] 467 `&ROTL128(49)` 468 stx %o4, [$out + 0xe0] ! k[56, 57] 469 stx %o5, [$out + 0xe8] ! k[58, 59] 470 471 movdtox %f28, %o4 ! k[ 0, 1] 472 movdtox %f30, %o5 ! k[ 2, 3] 473 `&ROTL128(45)` 474 stx %o4, [$out + 0x60] ! k[24, 25] 475 stx %o5, [$out + 0x68] ! k[26, 27] 476 `&ROTL128(15)` 477 stx %o4, [$out + 0x80] ! k[32, 33] 478 stx %o5, [$out + 0x88] ! k[34, 35] 479 `&ROTL128(17)` 480 stx %o4, [$out + 0xb0] ! k[44, 45] 481 stx %o5, [$out + 0xb8] ! k[46, 47] 482 `&ROTL128(34)` 483 stx %o4, [$out + 0xf0] ! k[60, 61] 484 stx %o5, [$out + 0xf8] ! k[62, 63] 485 486 mov 4, $tmp 487 st $tmp, [$out + 0x110] 488 retl 489 xor %o0, %o0, %o0 490.type cmll_t4_set_key,#function 491.size cmll_t4_set_key,.-cmll_t4_set_key 492.align 32 493SIGMA: 494 .long 0xa09e667f, 0x3bcc908b, 0xb67ae858, 0x4caa73b2 495 .long 0xc6ef372f, 0xe94f82be, 0x54ff53a5, 0xf1d36f1c 496 .long 0x10e527fa, 0xde682d1d, 0xb05688c2, 0xb3e6c1fd 497.type SIGMA,#object 498.size SIGMA,.-SIGMA 499.asciz "Camellia for SPARC T4, David S. Miller, Andy Polyakov" 500___ 501} 502 503{{{ 504my ($inp,$out,$len,$key,$ivec,$enc)=map("%i$_",(0..5)); 505my ($ileft,$iright,$ooff,$omask,$ivoff)=map("%l$_",(1..7)); 506 507$code.=<<___; 508.align 32 509_cmll128_load_enckey: 510 ldx [$key + 0], %g4 511 ldx [$key + 8], %g5 512___ 513for ($i=2; $i<26;$i++) { # load key schedule 514 $code.=<<___; 515 ldd [$key + `8*$i`], %f`12+2*$i` 516___ 517} 518$code.=<<___; 519 retl 520 nop 521.type _cmll128_load_enckey,#function 522.size _cmll128_load_enckey,.-_cmll128_load_enckey 523_cmll256_load_enckey=_cmll128_load_enckey 524 525.align 32 526_cmll256_load_deckey: 527 ldd [$key + 64], %f62 528 ldd [$key + 72], %f60 529 b .Load_deckey 530 add $key, 64, $key 531_cmll128_load_deckey: 532 ldd [$key + 0], %f60 533 ldd [$key + 8], %f62 534.Load_deckey: 535___ 536for ($i=2; $i<24;$i++) { # load key schedule 537 $code.=<<___; 538 ldd [$key + `8*$i`], %f`62-2*$i` 539___ 540} 541$code.=<<___; 542 ldx [$key + 192], %g4 543 retl 544 ldx [$key + 200], %g5 545.type _cmll256_load_deckey,#function 546.size _cmll256_load_deckey,.-_cmll256_load_deckey 547 548.align 32 549_cmll128_encrypt_1x: 550___ 551for ($i=0; $i<3; $i++) { 552 $code.=<<___; 553 camellia_f %f`16+16*$i+0`, %f2, %f0, %f2 554 camellia_f %f`16+16*$i+2`, %f0, %f2, %f0 555 camellia_f %f`16+16*$i+4`, %f2, %f0, %f2 556 camellia_f %f`16+16*$i+6`, %f0, %f2, %f0 557___ 558$code.=<<___ if ($i<2); 559 camellia_f %f`16+16*$i+8`, %f2, %f0, %f2 560 camellia_f %f`16+16*$i+10`, %f0, %f2, %f0 561 camellia_fl %f`16+16*$i+12`, %f0, %f0 562 camellia_fli %f`16+16*$i+14`, %f2, %f2 563___ 564} 565$code.=<<___; 566 camellia_f %f56, %f2, %f0, %f4 567 camellia_f %f58, %f0, %f4, %f2 568 fxor %f60, %f4, %f0 569 retl 570 fxor %f62, %f2, %f2 571.type _cmll128_encrypt_1x,#function 572.size _cmll128_encrypt_1x,.-_cmll128_encrypt_1x 573_cmll128_decrypt_1x=_cmll128_encrypt_1x 574 575.align 32 576_cmll128_encrypt_2x: 577___ 578for ($i=0; $i<3; $i++) { 579 $code.=<<___; 580 camellia_f %f`16+16*$i+0`, %f2, %f0, %f2 581 camellia_f %f`16+16*$i+0`, %f6, %f4, %f6 582 camellia_f %f`16+16*$i+2`, %f0, %f2, %f0 583 camellia_f %f`16+16*$i+2`, %f4, %f6, %f4 584 camellia_f %f`16+16*$i+4`, %f2, %f0, %f2 585 camellia_f %f`16+16*$i+4`, %f6, %f4, %f6 586 camellia_f %f`16+16*$i+6`, %f0, %f2, %f0 587 camellia_f %f`16+16*$i+6`, %f4, %f6, %f4 588___ 589$code.=<<___ if ($i<2); 590 camellia_f %f`16+16*$i+8`, %f2, %f0, %f2 591 camellia_f %f`16+16*$i+8`, %f6, %f4, %f6 592 camellia_f %f`16+16*$i+10`, %f0, %f2, %f0 593 camellia_f %f`16+16*$i+10`, %f4, %f6, %f4 594 camellia_fl %f`16+16*$i+12`, %f0, %f0 595 camellia_fl %f`16+16*$i+12`, %f4, %f4 596 camellia_fli %f`16+16*$i+14`, %f2, %f2 597 camellia_fli %f`16+16*$i+14`, %f6, %f6 598___ 599} 600$code.=<<___; 601 camellia_f %f56, %f2, %f0, %f8 602 camellia_f %f56, %f6, %f4, %f10 603 camellia_f %f58, %f0, %f8, %f2 604 camellia_f %f58, %f4, %f10, %f6 605 fxor %f60, %f8, %f0 606 fxor %f60, %f10, %f4 607 fxor %f62, %f2, %f2 608 retl 609 fxor %f62, %f6, %f6 610.type _cmll128_encrypt_2x,#function 611.size _cmll128_encrypt_2x,.-_cmll128_encrypt_2x 612_cmll128_decrypt_2x=_cmll128_encrypt_2x 613 614.align 32 615_cmll256_encrypt_1x: 616 camellia_f %f16, %f2, %f0, %f2 617 camellia_f %f18, %f0, %f2, %f0 618 ldd [$key + 208], %f16 619 ldd [$key + 216], %f18 620 camellia_f %f20, %f2, %f0, %f2 621 camellia_f %f22, %f0, %f2, %f0 622 ldd [$key + 224], %f20 623 ldd [$key + 232], %f22 624 camellia_f %f24, %f2, %f0, %f2 625 camellia_f %f26, %f0, %f2, %f0 626 ldd [$key + 240], %f24 627 ldd [$key + 248], %f26 628 camellia_fl %f28, %f0, %f0 629 camellia_fli %f30, %f2, %f2 630 ldd [$key + 256], %f28 631 ldd [$key + 264], %f30 632___ 633for ($i=1; $i<3; $i++) { 634 $code.=<<___; 635 camellia_f %f`16+16*$i+0`, %f2, %f0, %f2 636 camellia_f %f`16+16*$i+2`, %f0, %f2, %f0 637 camellia_f %f`16+16*$i+4`, %f2, %f0, %f2 638 camellia_f %f`16+16*$i+6`, %f0, %f2, %f0 639 camellia_f %f`16+16*$i+8`, %f2, %f0, %f2 640 camellia_f %f`16+16*$i+10`, %f0, %f2, %f0 641 camellia_fl %f`16+16*$i+12`, %f0, %f0 642 camellia_fli %f`16+16*$i+14`, %f2, %f2 643___ 644} 645$code.=<<___; 646 camellia_f %f16, %f2, %f0, %f2 647 camellia_f %f18, %f0, %f2, %f0 648 ldd [$key + 16], %f16 649 ldd [$key + 24], %f18 650 camellia_f %f20, %f2, %f0, %f2 651 camellia_f %f22, %f0, %f2, %f0 652 ldd [$key + 32], %f20 653 ldd [$key + 40], %f22 654 camellia_f %f24, %f2, %f0, %f4 655 camellia_f %f26, %f0, %f4, %f2 656 ldd [$key + 48], %f24 657 ldd [$key + 56], %f26 658 fxor %f28, %f4, %f0 659 fxor %f30, %f2, %f2 660 ldd [$key + 64], %f28 661 retl 662 ldd [$key + 72], %f30 663.type _cmll256_encrypt_1x,#function 664.size _cmll256_encrypt_1x,.-_cmll256_encrypt_1x 665 666.align 32 667_cmll256_encrypt_2x: 668 camellia_f %f16, %f2, %f0, %f2 669 camellia_f %f16, %f6, %f4, %f6 670 camellia_f %f18, %f0, %f2, %f0 671 camellia_f %f18, %f4, %f6, %f4 672 ldd [$key + 208], %f16 673 ldd [$key + 216], %f18 674 camellia_f %f20, %f2, %f0, %f2 675 camellia_f %f20, %f6, %f4, %f6 676 camellia_f %f22, %f0, %f2, %f0 677 camellia_f %f22, %f4, %f6, %f4 678 ldd [$key + 224], %f20 679 ldd [$key + 232], %f22 680 camellia_f %f24, %f2, %f0, %f2 681 camellia_f %f24, %f6, %f4, %f6 682 camellia_f %f26, %f0, %f2, %f0 683 camellia_f %f26, %f4, %f6, %f4 684 ldd [$key + 240], %f24 685 ldd [$key + 248], %f26 686 camellia_fl %f28, %f0, %f0 687 camellia_fl %f28, %f4, %f4 688 camellia_fli %f30, %f2, %f2 689 camellia_fli %f30, %f6, %f6 690 ldd [$key + 256], %f28 691 ldd [$key + 264], %f30 692___ 693for ($i=1; $i<3; $i++) { 694 $code.=<<___; 695 camellia_f %f`16+16*$i+0`, %f2, %f0, %f2 696 camellia_f %f`16+16*$i+0`, %f6, %f4, %f6 697 camellia_f %f`16+16*$i+2`, %f0, %f2, %f0 698 camellia_f %f`16+16*$i+2`, %f4, %f6, %f4 699 camellia_f %f`16+16*$i+4`, %f2, %f0, %f2 700 camellia_f %f`16+16*$i+4`, %f6, %f4, %f6 701 camellia_f %f`16+16*$i+6`, %f0, %f2, %f0 702 camellia_f %f`16+16*$i+6`, %f4, %f6, %f4 703 camellia_f %f`16+16*$i+8`, %f2, %f0, %f2 704 camellia_f %f`16+16*$i+8`, %f6, %f4, %f6 705 camellia_f %f`16+16*$i+10`, %f0, %f2, %f0 706 camellia_f %f`16+16*$i+10`, %f4, %f6, %f4 707 camellia_fl %f`16+16*$i+12`, %f0, %f0 708 camellia_fl %f`16+16*$i+12`, %f4, %f4 709 camellia_fli %f`16+16*$i+14`, %f2, %f2 710 camellia_fli %f`16+16*$i+14`, %f6, %f6 711___ 712} 713$code.=<<___; 714 camellia_f %f16, %f2, %f0, %f2 715 camellia_f %f16, %f6, %f4, %f6 716 camellia_f %f18, %f0, %f2, %f0 717 camellia_f %f18, %f4, %f6, %f4 718 ldd [$key + 16], %f16 719 ldd [$key + 24], %f18 720 camellia_f %f20, %f2, %f0, %f2 721 camellia_f %f20, %f6, %f4, %f6 722 camellia_f %f22, %f0, %f2, %f0 723 camellia_f %f22, %f4, %f6, %f4 724 ldd [$key + 32], %f20 725 ldd [$key + 40], %f22 726 camellia_f %f24, %f2, %f0, %f8 727 camellia_f %f24, %f6, %f4, %f10 728 camellia_f %f26, %f0, %f8, %f2 729 camellia_f %f26, %f4, %f10, %f6 730 ldd [$key + 48], %f24 731 ldd [$key + 56], %f26 732 fxor %f28, %f8, %f0 733 fxor %f28, %f10, %f4 734 fxor %f30, %f2, %f2 735 fxor %f30, %f6, %f6 736 ldd [$key + 64], %f28 737 retl 738 ldd [$key + 72], %f30 739.type _cmll256_encrypt_2x,#function 740.size _cmll256_encrypt_2x,.-_cmll256_encrypt_2x 741 742.align 32 743_cmll256_decrypt_1x: 744 camellia_f %f16, %f2, %f0, %f2 745 camellia_f %f18, %f0, %f2, %f0 746 ldd [$key - 8], %f16 747 ldd [$key - 16], %f18 748 camellia_f %f20, %f2, %f0, %f2 749 camellia_f %f22, %f0, %f2, %f0 750 ldd [$key - 24], %f20 751 ldd [$key - 32], %f22 752 camellia_f %f24, %f2, %f0, %f2 753 camellia_f %f26, %f0, %f2, %f0 754 ldd [$key - 40], %f24 755 ldd [$key - 48], %f26 756 camellia_fl %f28, %f0, %f0 757 camellia_fli %f30, %f2, %f2 758 ldd [$key - 56], %f28 759 ldd [$key - 64], %f30 760___ 761for ($i=1; $i<3; $i++) { 762 $code.=<<___; 763 camellia_f %f`16+16*$i+0`, %f2, %f0, %f2 764 camellia_f %f`16+16*$i+2`, %f0, %f2, %f0 765 camellia_f %f`16+16*$i+4`, %f2, %f0, %f2 766 camellia_f %f`16+16*$i+6`, %f0, %f2, %f0 767 camellia_f %f`16+16*$i+8`, %f2, %f0, %f2 768 camellia_f %f`16+16*$i+10`, %f0, %f2, %f0 769 camellia_fl %f`16+16*$i+12`, %f0, %f0 770 camellia_fli %f`16+16*$i+14`, %f2, %f2 771___ 772} 773$code.=<<___; 774 camellia_f %f16, %f2, %f0, %f2 775 camellia_f %f18, %f0, %f2, %f0 776 ldd [$key + 184], %f16 777 ldd [$key + 176], %f18 778 camellia_f %f20, %f2, %f0, %f2 779 camellia_f %f22, %f0, %f2, %f0 780 ldd [$key + 168], %f20 781 ldd [$key + 160], %f22 782 camellia_f %f24, %f2, %f0, %f4 783 camellia_f %f26, %f0, %f4, %f2 784 ldd [$key + 152], %f24 785 ldd [$key + 144], %f26 786 fxor %f30, %f4, %f0 787 fxor %f28, %f2, %f2 788 ldd [$key + 136], %f28 789 retl 790 ldd [$key + 128], %f30 791.type _cmll256_decrypt_1x,#function 792.size _cmll256_decrypt_1x,.-_cmll256_decrypt_1x 793 794.align 32 795_cmll256_decrypt_2x: 796 camellia_f %f16, %f2, %f0, %f2 797 camellia_f %f16, %f6, %f4, %f6 798 camellia_f %f18, %f0, %f2, %f0 799 camellia_f %f18, %f4, %f6, %f4 800 ldd [$key - 8], %f16 801 ldd [$key - 16], %f18 802 camellia_f %f20, %f2, %f0, %f2 803 camellia_f %f20, %f6, %f4, %f6 804 camellia_f %f22, %f0, %f2, %f0 805 camellia_f %f22, %f4, %f6, %f4 806 ldd [$key - 24], %f20 807 ldd [$key - 32], %f22 808 camellia_f %f24, %f2, %f0, %f2 809 camellia_f %f24, %f6, %f4, %f6 810 camellia_f %f26, %f0, %f2, %f0 811 camellia_f %f26, %f4, %f6, %f4 812 ldd [$key - 40], %f24 813 ldd [$key - 48], %f26 814 camellia_fl %f28, %f0, %f0 815 camellia_fl %f28, %f4, %f4 816 camellia_fli %f30, %f2, %f2 817 camellia_fli %f30, %f6, %f6 818 ldd [$key - 56], %f28 819 ldd [$key - 64], %f30 820___ 821for ($i=1; $i<3; $i++) { 822 $code.=<<___; 823 camellia_f %f`16+16*$i+0`, %f2, %f0, %f2 824 camellia_f %f`16+16*$i+0`, %f6, %f4, %f6 825 camellia_f %f`16+16*$i+2`, %f0, %f2, %f0 826 camellia_f %f`16+16*$i+2`, %f4, %f6, %f4 827 camellia_f %f`16+16*$i+4`, %f2, %f0, %f2 828 camellia_f %f`16+16*$i+4`, %f6, %f4, %f6 829 camellia_f %f`16+16*$i+6`, %f0, %f2, %f0 830 camellia_f %f`16+16*$i+6`, %f4, %f6, %f4 831 camellia_f %f`16+16*$i+8`, %f2, %f0, %f2 832 camellia_f %f`16+16*$i+8`, %f6, %f4, %f6 833 camellia_f %f`16+16*$i+10`, %f0, %f2, %f0 834 camellia_f %f`16+16*$i+10`, %f4, %f6, %f4 835 camellia_fl %f`16+16*$i+12`, %f0, %f0 836 camellia_fl %f`16+16*$i+12`, %f4, %f4 837 camellia_fli %f`16+16*$i+14`, %f2, %f2 838 camellia_fli %f`16+16*$i+14`, %f6, %f6 839___ 840} 841$code.=<<___; 842 camellia_f %f16, %f2, %f0, %f2 843 camellia_f %f16, %f6, %f4, %f6 844 camellia_f %f18, %f0, %f2, %f0 845 camellia_f %f18, %f4, %f6, %f4 846 ldd [$key + 184], %f16 847 ldd [$key + 176], %f18 848 camellia_f %f20, %f2, %f0, %f2 849 camellia_f %f20, %f6, %f4, %f6 850 camellia_f %f22, %f0, %f2, %f0 851 camellia_f %f22, %f4, %f6, %f4 852 ldd [$key + 168], %f20 853 ldd [$key + 160], %f22 854 camellia_f %f24, %f2, %f0, %f8 855 camellia_f %f24, %f6, %f4, %f10 856 camellia_f %f26, %f0, %f8, %f2 857 camellia_f %f26, %f4, %f10, %f6 858 ldd [$key + 152], %f24 859 ldd [$key + 144], %f26 860 fxor %f30, %f8, %f0 861 fxor %f30, %f10, %f4 862 fxor %f28, %f2, %f2 863 fxor %f28, %f6, %f6 864 ldd [$key + 136], %f28 865 retl 866 ldd [$key + 128], %f30 867.type _cmll256_decrypt_2x,#function 868.size _cmll256_decrypt_2x,.-_cmll256_decrypt_2x 869___ 870 871&alg_cbc_encrypt_implement("cmll",128); 872&alg_cbc_encrypt_implement("cmll",256); 873 874&alg_cbc_decrypt_implement("cmll",128); 875&alg_cbc_decrypt_implement("cmll",256); 876 877if ($::evp) { 878 &alg_ctr32_implement("cmll",128); 879 &alg_ctr32_implement("cmll",256); 880} 881}}} 882 883if (!$::evp) { 884$code.=<<___; 885.global Camellia_encrypt 886Camellia_encrypt=cmll_t4_encrypt 887.global Camellia_decrypt 888Camellia_decrypt=cmll_t4_decrypt 889.global Camellia_set_key 890.align 32 891Camellia_set_key: 892 andcc %o2, 7, %g0 ! double-check alignment 893 bnz,a,pn %icc, 1f 894 mov -1, %o0 895 brz,a,pn %o0, 1f 896 mov -1, %o0 897 brz,a,pn %o2, 1f 898 mov -1, %o0 899 andncc %o1, 0x1c0, %g0 900 bnz,a,pn %icc, 1f 901 mov -2, %o0 902 cmp %o1, 128 903 bl,a,pn %icc, 1f 904 mov -2, %o0 905 b cmll_t4_set_key 906 nop 9071: retl 908 nop 909.type Camellia_set_key,#function 910.size Camellia_set_key,.-Camellia_set_key 911___ 912 913my ($inp,$out,$len,$key,$ivec,$enc)=map("%o$_",(0..5)); 914 915$code.=<<___; 916.globl Camellia_cbc_encrypt 917.align 32 918Camellia_cbc_encrypt: 919 ld [$key + 272], %g1 920 nop 921 brz $enc, .Lcbc_decrypt 922 cmp %g1, 3 923 924 be,pt %icc, cmll128_t4_cbc_encrypt 925 nop 926 ba cmll256_t4_cbc_encrypt 927 nop 928 929.Lcbc_decrypt: 930 be,pt %icc, cmll128_t4_cbc_decrypt 931 nop 932 ba cmll256_t4_cbc_decrypt 933 nop 934.type Camellia_cbc_encrypt,#function 935.size Camellia_cbc_encrypt,.-Camellia_cbc_encrypt 936___ 937} 938 939&emit_assembler(); 940 941close STDOUT or die "error closing STDOUT: $!"; 942