1! Copyright 2000-2018 The OpenSSL Project Authors. All Rights Reserved. 2! 3! Licensed under the Apache License 2.0 (the "License"). You may not use 4! this file except in compliance with the License. You can obtain a copy 5! in the file LICENSE in the source distribution or at 6! https://www.openssl.org/source/license.html 7! 8! To expand the m4 macros: m4 -B 8192 des_enc.m4 > des_enc.S 9! 10! Global registers 1 to 5 are used. This is the same as done by the 11! cc compiler. The UltraSPARC load/store little endian feature is used. 12! 13! Instruction grouping often refers to one CPU cycle. 14! 15! Assemble through gcc: gcc -c -mcpu=ultrasparc -o des_enc.o des_enc.S 16! 17! Assemble through cc: cc -c -xarch=v8plusa -o des_enc.o des_enc.S 18! 19! Performance improvement according to './apps/openssl speed des' 20! 21! 32-bit build: 22! 23% faster than cc-5.2 -xarch=v8plus -xO5 23! 115% faster than gcc-3.2.1 -m32 -mcpu=ultrasparc -O5 24! 64-bit build: 25! 50% faster than cc-5.2 -xarch=v9 -xO5 26! 100% faster than gcc-3.2.1 -m64 -mcpu=ultrasparc -O5 27! 28 29.ident "des_enc.m4 2.1" 30.file "des_enc-sparc.S" 31 32#if defined(__SUNPRO_C) && defined(__sparcv9) 33# define ABI64 /* They've said -xarch=v9 at command line */ 34#elif defined(__GNUC__) && defined(__arch64__) 35# define ABI64 /* They've said -m64 at command line */ 36#endif 37 38#ifdef ABI64 39 .register %g2,#scratch 40 .register %g3,#scratch 41# define FRAME -192 42# define BIAS 2047 43# define LDPTR ldx 44# define STPTR stx 45# define ARG0 128 46# define ARGSZ 8 47#else 48# define FRAME -96 49# define BIAS 0 50# define LDPTR ld 51# define STPTR st 52# define ARG0 68 53# define ARGSZ 4 54#endif 55 56#define LOOPS 7 57 58#define global0 %g0 59#define global1 %g1 60#define global2 %g2 61#define global3 %g3 62#define global4 %g4 63#define global5 %g5 64 65#define local0 %l0 66#define local1 %l1 67#define local2 %l2 68#define local3 %l3 69#define local4 %l4 70#define local5 %l5 71#define local7 %l6 72#define local6 %l7 73 74#define in0 %i0 75#define in1 %i1 76#define in2 %i2 77#define in3 %i3 78#define in4 %i4 79#define in5 %i5 80#define in6 %i6 81#define in7 %i7 82 83#define out0 %o0 84#define out1 %o1 85#define out2 %o2 86#define out3 %o3 87#define out4 %o4 88#define out5 %o5 89#define out6 %o6 90#define out7 %o7 91 92#define stub stb 93 94changequote({,}) 95 96 97! Macro definitions: 98 99 100! {ip_macro} 101! 102! The logic used in initial and final permutations is the same as in 103! the C code. The permutations are done with a clever shift, xor, and 104! technique. 105! 106! The macro also loads address sbox 1 to 5 to global 1 to 5, address 107! sbox 6 to local6, and address sbox 8 to out3. 108! 109! Rotates the halves 3 left to bring the sbox bits in convenient positions. 110! 111! Loads key first round from address in parameter 5 to out0, out1. 112! 113! After the original LibDES initial permutation, the resulting left 114! is in the variable initially used for right and vice versa. The macro 115! implements the possibility to keep the halves in the original registers. 116! 117! parameter 1 left 118! parameter 2 right 119! parameter 3 result left (modify in first round) 120! parameter 4 result right (use in first round) 121! parameter 5 key address 122! parameter 6 1/2 for include encryption/decryption 123! parameter 7 1 for move in1 to in3 124! parameter 8 1 for move in3 to in4, 2 for move in4 to in3 125! parameter 9 1 for load ks3 and ks2 to in4 and in3 126 127define(ip_macro, { 128 129! {ip_macro} 130! $1 $2 $4 $3 $5 $6 $7 $8 $9 131 132 ld [out2+256], local1 133 srl $2, 4, local4 134 135 xor local4, $1, local4 136 ifelse($7,1,{mov in1, in3},{nop}) 137 138 ld [out2+260], local2 139 and local4, local1, local4 140 ifelse($8,1,{mov in3, in4},{}) 141 ifelse($8,2,{mov in4, in3},{}) 142 143 ld [out2+280], out4 ! loop counter 144 sll local4, 4, local1 145 xor $1, local4, $1 146 147 ld [out2+264], local3 148 srl $1, 16, local4 149 xor $2, local1, $2 150 151 ifelse($9,1,{LDPTR KS3, in4},{}) 152 xor local4, $2, local4 153 nop !sethi %hi(DES_SPtrans), global1 ! sbox addr 154 155 ifelse($9,1,{LDPTR KS2, in3},{}) 156 and local4, local2, local4 157 nop !or global1, %lo(DES_SPtrans), global1 ! sbox addr 158 159 sll local4, 16, local1 160 xor $2, local4, $2 161 162 srl $2, 2, local4 163 xor $1, local1, $1 164 165 sethi %hi(16711680), local5 166 xor local4, $1, local4 167 168 and local4, local3, local4 169 or local5, 255, local5 170 171 sll local4, 2, local2 172 xor $1, local4, $1 173 174 srl $1, 8, local4 175 xor $2, local2, $2 176 177 xor local4, $2, local4 178 add global1, 768, global4 179 180 and local4, local5, local4 181 add global1, 1024, global5 182 183 ld [out2+272], local7 184 sll local4, 8, local1 185 xor $2, local4, $2 186 187 srl $2, 1, local4 188 xor $1, local1, $1 189 190 ld [$5], out0 ! key 7531 191 xor local4, $1, local4 192 add global1, 256, global2 193 194 ld [$5+4], out1 ! key 8642 195 and local4, local7, local4 196 add global1, 512, global3 197 198 sll local4, 1, local1 199 xor $1, local4, $1 200 201 sll $1, 3, local3 202 xor $2, local1, $2 203 204 sll $2, 3, local2 205 add global1, 1280, local6 ! address sbox 8 206 207 srl $1, 29, local4 208 add global1, 1792, out3 ! address sbox 8 209 210 srl $2, 29, local1 211 or local4, local3, $4 212 213 or local2, local1, $3 214 215 ifelse($6, 1, { 216 217 ld [out2+284], local5 ! 0x0000FC00 used in the rounds 218 or local2, local1, $3 219 xor $4, out0, local1 220 221 call .des_enc.1 222 and local1, 252, local1 223 224 },{}) 225 226 ifelse($6, 2, { 227 228 ld [out2+284], local5 ! 0x0000FC00 used in the rounds 229 or local2, local1, $3 230 xor $4, out0, local1 231 232 call .des_dec.1 233 and local1, 252, local1 234 235 },{}) 236}) 237 238 239! {rounds_macro} 240! 241! The logic used in the DES rounds is the same as in the C code, 242! except that calculations for sbox 1 and sbox 5 begin before 243! the previous round is finished. 244! 245! In each round one half (work) is modified based on key and the 246! other half (use). 247! 248! In this version we do two rounds in a loop repeated 7 times 249! and two rounds separately. 250! 251! One half has the bits for the sboxes in the following positions: 252! 253! 777777xx555555xx333333xx111111xx 254! 255! 88xx666666xx444444xx222222xx8888 256! 257! The bits for each sbox are xor-ed with the key bits for that box. 258! The above xx bits are cleared, and the result used for lookup in 259! the sbox table. Each sbox entry contains the 4 output bits permuted 260! into 32 bits according to the P permutation. 261! 262! In the description of DES, left and right are switched after 263! each round, except after last round. In this code the original 264! left and right are kept in the same register in all rounds, meaning 265! that after the 16 rounds the result for right is in the register 266! originally used for left. 267! 268! parameter 1 first work (left in first round) 269! parameter 2 first use (right in first round) 270! parameter 3 enc/dec 1/-1 271! parameter 4 loop label 272! parameter 5 key address register 273! parameter 6 optional address for key next encryption/decryption 274! parameter 7 not empty for include retl 275! 276! also compares in2 to 8 277 278define(rounds_macro, { 279 280! {rounds_macro} 281! $1 $2 $3 $4 $5 $6 $7 $8 $9 282 283 xor $2, out0, local1 284 285 ld [out2+284], local5 ! 0x0000FC00 286 ba $4 287 and local1, 252, local1 288 289 .align 32 290 291$4: 292 ! local6 is address sbox 6 293 ! out3 is address sbox 8 294 ! out4 is loop counter 295 296 ld [global1+local1], local1 297 xor $2, out1, out1 ! 8642 298 xor $2, out0, out0 ! 7531 299 ! fmovs %f0, %f0 ! fxor used for alignment 300 301 srl out1, 4, local0 ! rotate 4 right 302 and out0, local5, local3 ! 3 303 ! fmovs %f0, %f0 304 305 ld [$5+$3*8], local7 ! key 7531 next round 306 srl local3, 8, local3 ! 3 307 and local0, 252, local2 ! 2 308 ! fmovs %f0, %f0 309 310 ld [global3+local3],local3 ! 3 311 sll out1, 28, out1 ! rotate 312 xor $1, local1, $1 ! 1 finished, local1 now sbox 7 313 314 ld [global2+local2], local2 ! 2 315 srl out0, 24, local1 ! 7 316 or out1, local0, out1 ! rotate 317 318 ldub [out2+local1], local1 ! 7 (and 0xFC) 319 srl out1, 24, local0 ! 8 320 and out1, local5, local4 ! 4 321 322 ldub [out2+local0], local0 ! 8 (and 0xFC) 323 srl local4, 8, local4 ! 4 324 xor $1, local2, $1 ! 2 finished local2 now sbox 6 325 326 ld [global4+local4],local4 ! 4 327 srl out1, 16, local2 ! 6 328 xor $1, local3, $1 ! 3 finished local3 now sbox 5 329 330 ld [out3+local0],local0 ! 8 331 and local2, 252, local2 ! 6 332 add global1, 1536, local5 ! address sbox 7 333 334 ld [local6+local2], local2 ! 6 335 srl out0, 16, local3 ! 5 336 xor $1, local4, $1 ! 4 finished 337 338 ld [local5+local1],local1 ! 7 339 and local3, 252, local3 ! 5 340 xor $1, local0, $1 ! 8 finished 341 342 ld [global5+local3],local3 ! 5 343 xor $1, local2, $1 ! 6 finished 344 subcc out4, 1, out4 345 346 ld [$5+$3*8+4], out0 ! key 8642 next round 347 xor $1, local7, local2 ! sbox 5 next round 348 xor $1, local1, $1 ! 7 finished 349 350 srl local2, 16, local2 ! sbox 5 next round 351 xor $1, local3, $1 ! 5 finished 352 353 ld [$5+$3*16+4], out1 ! key 8642 next round again 354 and local2, 252, local2 ! sbox5 next round 355! next round 356 xor $1, local7, local7 ! 7531 357 358 ld [global5+local2], local2 ! 5 359 srl local7, 24, local3 ! 7 360 xor $1, out0, out0 ! 8642 361 362 ldub [out2+local3], local3 ! 7 (and 0xFC) 363 srl out0, 4, local0 ! rotate 4 right 364 and local7, 252, local1 ! 1 365 366 sll out0, 28, out0 ! rotate 367 xor $2, local2, $2 ! 5 finished local2 used 368 369 srl local0, 8, local4 ! 4 370 and local0, 252, local2 ! 2 371 ld [local5+local3], local3 ! 7 372 373 srl local0, 16, local5 ! 6 374 or out0, local0, out0 ! rotate 375 ld [global2+local2], local2 ! 2 376 377 srl out0, 24, local0 378 ld [$5+$3*16], out0 ! key 7531 next round 379 and local4, 252, local4 ! 4 380 381 and local5, 252, local5 ! 6 382 ld [global4+local4], local4 ! 4 383 xor $2, local3, $2 ! 7 finished local3 used 384 385 and local0, 252, local0 ! 8 386 ld [local6+local5], local5 ! 6 387 xor $2, local2, $2 ! 2 finished local2 now sbox 3 388 389 srl local7, 8, local2 ! 3 start 390 ld [out3+local0], local0 ! 8 391 xor $2, local4, $2 ! 4 finished 392 393 and local2, 252, local2 ! 3 394 ld [global1+local1], local1 ! 1 395 xor $2, local5, $2 ! 6 finished local5 used 396 397 ld [global3+local2], local2 ! 3 398 xor $2, local0, $2 ! 8 finished 399 add $5, $3*16, $5 ! enc add 8, dec add -8 to key pointer 400 401 ld [out2+284], local5 ! 0x0000FC00 402 xor $2, out0, local4 ! sbox 1 next round 403 xor $2, local1, $2 ! 1 finished 404 405 xor $2, local2, $2 ! 3 finished 406 bne $4 407 and local4, 252, local1 ! sbox 1 next round 408 409! two rounds more: 410 411 ld [global1+local1], local1 412 xor $2, out1, out1 413 xor $2, out0, out0 414 415 srl out1, 4, local0 ! rotate 416 and out0, local5, local3 417 418 ld [$5+$3*8], local7 ! key 7531 419 srl local3, 8, local3 420 and local0, 252, local2 421 422 ld [global3+local3],local3 423 sll out1, 28, out1 ! rotate 424 xor $1, local1, $1 ! 1 finished, local1 now sbox 7 425 426 ld [global2+local2], local2 427 srl out0, 24, local1 428 or out1, local0, out1 ! rotate 429 430 ldub [out2+local1], local1 431 srl out1, 24, local0 432 and out1, local5, local4 433 434 ldub [out2+local0], local0 435 srl local4, 8, local4 436 xor $1, local2, $1 ! 2 finished local2 now sbox 6 437 438 ld [global4+local4],local4 439 srl out1, 16, local2 440 xor $1, local3, $1 ! 3 finished local3 now sbox 5 441 442 ld [out3+local0],local0 443 and local2, 252, local2 444 add global1, 1536, local5 ! address sbox 7 445 446 ld [local6+local2], local2 447 srl out0, 16, local3 448 xor $1, local4, $1 ! 4 finished 449 450 ld [local5+local1],local1 451 and local3, 252, local3 452 xor $1, local0, $1 453 454 ld [global5+local3],local3 455 xor $1, local2, $1 ! 6 finished 456 cmp in2, 8 457 458 ifelse($6,{}, {}, {ld [out2+280], out4}) ! loop counter 459 xor $1, local7, local2 ! sbox 5 next round 460 xor $1, local1, $1 ! 7 finished 461 462 ld [$5+$3*8+4], out0 463 srl local2, 16, local2 ! sbox 5 next round 464 xor $1, local3, $1 ! 5 finished 465 466 and local2, 252, local2 467! next round (two rounds more) 468 xor $1, local7, local7 ! 7531 469 470 ld [global5+local2], local2 471 srl local7, 24, local3 472 xor $1, out0, out0 ! 8642 473 474 ldub [out2+local3], local3 475 srl out0, 4, local0 ! rotate 476 and local7, 252, local1 477 478 sll out0, 28, out0 ! rotate 479 xor $2, local2, $2 ! 5 finished local2 used 480 481 srl local0, 8, local4 482 and local0, 252, local2 483 ld [local5+local3], local3 484 485 srl local0, 16, local5 486 or out0, local0, out0 ! rotate 487 ld [global2+local2], local2 488 489 srl out0, 24, local0 490 ifelse($6,{}, {}, {ld [$6], out0}) ! key next encryption/decryption 491 and local4, 252, local4 492 493 and local5, 252, local5 494 ld [global4+local4], local4 495 xor $2, local3, $2 ! 7 finished local3 used 496 497 and local0, 252, local0 498 ld [local6+local5], local5 499 xor $2, local2, $2 ! 2 finished local2 now sbox 3 500 501 srl local7, 8, local2 ! 3 start 502 ld [out3+local0], local0 503 xor $2, local4, $2 504 505 and local2, 252, local2 506 ld [global1+local1], local1 507 xor $2, local5, $2 ! 6 finished local5 used 508 509 ld [global3+local2], local2 510 srl $1, 3, local3 511 xor $2, local0, $2 512 513 ifelse($6,{}, {}, {ld [$6+4], out1}) ! key next encryption/decryption 514 sll $1, 29, local4 515 xor $2, local1, $2 516 517 ifelse($7,{}, {}, {retl}) 518 xor $2, local2, $2 519}) 520 521 522! {fp_macro} 523! 524! parameter 1 right (original left) 525! parameter 2 left (original right) 526! parameter 3 1 for optional store to [in0] 527! parameter 4 1 for load input/output address to local5/7 528! 529! The final permutation logic switches the halves, meaning that 530! left and right ends up the registers originally used. 531 532define(fp_macro, { 533 534! {fp_macro} 535! $1 $2 $3 $4 $5 $6 $7 $8 $9 536 537 ! initially undo the rotate 3 left done after initial permutation 538 ! original left is received shifted 3 right and 29 left in local3/4 539 540 sll $2, 29, local1 541 or local3, local4, $1 542 543 srl $2, 3, $2 544 sethi %hi(0x55555555), local2 545 546 or $2, local1, $2 547 or local2, %lo(0x55555555), local2 548 549 srl $2, 1, local3 550 sethi %hi(0x00ff00ff), local1 551 xor local3, $1, local3 552 or local1, %lo(0x00ff00ff), local1 553 and local3, local2, local3 554 sethi %hi(0x33333333), local4 555 sll local3, 1, local2 556 557 xor $1, local3, $1 558 559 srl $1, 8, local3 560 xor $2, local2, $2 561 xor local3, $2, local3 562 or local4, %lo(0x33333333), local4 563 and local3, local1, local3 564 sethi %hi(0x0000ffff), local1 565 sll local3, 8, local2 566 567 xor $2, local3, $2 568 569 srl $2, 2, local3 570 xor $1, local2, $1 571 xor local3, $1, local3 572 or local1, %lo(0x0000ffff), local1 573 and local3, local4, local3 574 sethi %hi(0x0f0f0f0f), local4 575 sll local3, 2, local2 576 577 ifelse($4,1, {LDPTR INPUT, local5}) 578 xor $1, local3, $1 579 580 ifelse($4,1, {LDPTR OUTPUT, local7}) 581 srl $1, 16, local3 582 xor $2, local2, $2 583 xor local3, $2, local3 584 or local4, %lo(0x0f0f0f0f), local4 585 and local3, local1, local3 586 sll local3, 16, local2 587 588 xor $2, local3, local1 589 590 srl local1, 4, local3 591 xor $1, local2, $1 592 xor local3, $1, local3 593 and local3, local4, local3 594 sll local3, 4, local2 595 596 xor $1, local3, $1 597 598 ! optional store: 599 600 ifelse($3,1, {st $1, [in0]}) 601 602 xor local1, local2, $2 603 604 ifelse($3,1, {st $2, [in0+4]}) 605 606}) 607 608 609! {fp_ip_macro} 610! 611! Does initial permutation for next block mixed with 612! final permutation for current block. 613! 614! parameter 1 original left 615! parameter 2 original right 616! parameter 3 left ip 617! parameter 4 right ip 618! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4 619! 2: mov in4 to in3 620! 621! also adds -8 to length in2 and loads loop counter to out4 622 623define(fp_ip_macro, { 624 625! {fp_ip_macro} 626! $1 $2 $3 $4 $5 $6 $7 $8 $9 627 628 define({temp1},{out4}) 629 define({temp2},{local3}) 630 631 define({ip1},{local1}) 632 define({ip2},{local2}) 633 define({ip4},{local4}) 634 define({ip5},{local5}) 635 636 ! $1 in local3, local4 637 638 ld [out2+256], ip1 639 sll out5, 29, temp1 640 or local3, local4, $1 641 642 srl out5, 3, $2 643 ifelse($5,2,{mov in4, in3}) 644 645 ld [out2+272], ip5 646 srl $4, 4, local0 647 or $2, temp1, $2 648 649 srl $2, 1, temp1 650 xor temp1, $1, temp1 651 652 and temp1, ip5, temp1 653 xor local0, $3, local0 654 655 sll temp1, 1, temp2 656 xor $1, temp1, $1 657 658 and local0, ip1, local0 659 add in2, -8, in2 660 661 sll local0, 4, local7 662 xor $3, local0, $3 663 664 ld [out2+268], ip4 665 srl $1, 8, temp1 666 xor $2, temp2, $2 667 ld [out2+260], ip2 668 srl $3, 16, local0 669 xor $4, local7, $4 670 xor temp1, $2, temp1 671 xor local0, $4, local0 672 and temp1, ip4, temp1 673 and local0, ip2, local0 674 sll temp1, 8, temp2 675 xor $2, temp1, $2 676 sll local0, 16, local7 677 xor $4, local0, $4 678 679 srl $2, 2, temp1 680 xor $1, temp2, $1 681 682 ld [out2+264], temp2 ! ip3 683 srl $4, 2, local0 684 xor $3, local7, $3 685 xor temp1, $1, temp1 686 xor local0, $3, local0 687 and temp1, temp2, temp1 688 and local0, temp2, local0 689 sll temp1, 2, temp2 690 xor $1, temp1, $1 691 sll local0, 2, local7 692 xor $3, local0, $3 693 694 srl $1, 16, temp1 695 xor $2, temp2, $2 696 srl $3, 8, local0 697 xor $4, local7, $4 698 xor temp1, $2, temp1 699 xor local0, $4, local0 700 and temp1, ip2, temp1 701 and local0, ip4, local0 702 sll temp1, 16, temp2 703 xor $2, temp1, local4 704 sll local0, 8, local7 705 xor $4, local0, $4 706 707 srl $4, 1, local0 708 xor $3, local7, $3 709 710 srl local4, 4, temp1 711 xor local0, $3, local0 712 713 xor $1, temp2, $1 714 and local0, ip5, local0 715 716 sll local0, 1, local7 717 xor temp1, $1, temp1 718 719 xor $3, local0, $3 720 xor $4, local7, $4 721 722 sll $3, 3, local5 723 and temp1, ip1, temp1 724 725 sll temp1, 4, temp2 726 xor $1, temp1, $1 727 728 ifelse($5,1,{LDPTR KS2, in4}) 729 sll $4, 3, local2 730 xor local4, temp2, $2 731 732 ! reload since used as temporary: 733 734 ld [out2+280], out4 ! loop counter 735 736 srl $3, 29, local0 737 ifelse($5,1,{add in4, 120, in4}) 738 739 ifelse($5,1,{LDPTR KS1, in3}) 740 srl $4, 29, local7 741 742 or local0, local5, $4 743 or local2, local7, $3 744 745}) 746 747 748 749! {load_little_endian} 750! 751! parameter 1 address 752! parameter 2 destination left 753! parameter 3 destination right 754! parameter 4 temporary 755! parameter 5 label 756 757define(load_little_endian, { 758 759! {load_little_endian} 760! $1 $2 $3 $4 $5 $6 $7 $8 $9 761 762 ! first in memory to rightmost in register 763 764$5: 765 ldub [$1+3], $2 766 767 ldub [$1+2], $4 768 sll $2, 8, $2 769 or $2, $4, $2 770 771 ldub [$1+1], $4 772 sll $2, 8, $2 773 or $2, $4, $2 774 775 ldub [$1+0], $4 776 sll $2, 8, $2 777 or $2, $4, $2 778 779 780 ldub [$1+3+4], $3 781 782 ldub [$1+2+4], $4 783 sll $3, 8, $3 784 or $3, $4, $3 785 786 ldub [$1+1+4], $4 787 sll $3, 8, $3 788 or $3, $4, $3 789 790 ldub [$1+0+4], $4 791 sll $3, 8, $3 792 or $3, $4, $3 793$5a: 794 795}) 796 797 798! {load_little_endian_inc} 799! 800! parameter 1 address 801! parameter 2 destination left 802! parameter 3 destination right 803! parameter 4 temporary 804! parameter 4 label 805! 806! adds 8 to address 807 808define(load_little_endian_inc, { 809 810! {load_little_endian_inc} 811! $1 $2 $3 $4 $5 $6 $7 $8 $9 812 813 ! first in memory to rightmost in register 814 815$5: 816 ldub [$1+3], $2 817 818 ldub [$1+2], $4 819 sll $2, 8, $2 820 or $2, $4, $2 821 822 ldub [$1+1], $4 823 sll $2, 8, $2 824 or $2, $4, $2 825 826 ldub [$1+0], $4 827 sll $2, 8, $2 828 or $2, $4, $2 829 830 ldub [$1+3+4], $3 831 add $1, 8, $1 832 833 ldub [$1+2+4-8], $4 834 sll $3, 8, $3 835 or $3, $4, $3 836 837 ldub [$1+1+4-8], $4 838 sll $3, 8, $3 839 or $3, $4, $3 840 841 ldub [$1+0+4-8], $4 842 sll $3, 8, $3 843 or $3, $4, $3 844$5a: 845 846}) 847 848 849! {load_n_bytes} 850! 851! Loads 1 to 7 bytes little endian 852! Remaining bytes are zeroed. 853! 854! parameter 1 address 855! parameter 2 length 856! parameter 3 destination register left 857! parameter 4 destination register right 858! parameter 5 temp 859! parameter 6 temp2 860! parameter 7 label 861! parameter 8 return label 862 863define(load_n_bytes, { 864 865! {load_n_bytes} 866! $1 $2 $5 $6 $7 $8 $7 $8 $9 867 868$7.0: call .+8 869 sll $2, 2, $6 870 871 add %o7,$7.jmp.table-$7.0,$5 872 873 add $5, $6, $5 874 mov 0, $4 875 876 ld [$5], $5 877 878 jmp %o7+$5 879 mov 0, $3 880 881$7.7: 882 ldub [$1+6], $5 883 sll $5, 16, $5 884 or $3, $5, $3 885$7.6: 886 ldub [$1+5], $5 887 sll $5, 8, $5 888 or $3, $5, $3 889$7.5: 890 ldub [$1+4], $5 891 or $3, $5, $3 892$7.4: 893 ldub [$1+3], $5 894 sll $5, 24, $5 895 or $4, $5, $4 896$7.3: 897 ldub [$1+2], $5 898 sll $5, 16, $5 899 or $4, $5, $4 900$7.2: 901 ldub [$1+1], $5 902 sll $5, 8, $5 903 or $4, $5, $4 904$7.1: 905 ldub [$1+0], $5 906 ba $8 907 or $4, $5, $4 908 909 .align 4 910 911$7.jmp.table: 912 .word 0 913 .word $7.1-$7.0 914 .word $7.2-$7.0 915 .word $7.3-$7.0 916 .word $7.4-$7.0 917 .word $7.5-$7.0 918 .word $7.6-$7.0 919 .word $7.7-$7.0 920}) 921 922 923! {store_little_endian} 924! 925! parameter 1 address 926! parameter 2 source left 927! parameter 3 source right 928! parameter 4 temporary 929 930define(store_little_endian, { 931 932! {store_little_endian} 933! $1 $2 $3 $4 $5 $6 $7 $8 $9 934 935 ! rightmost in register to first in memory 936 937$5: 938 and $2, 255, $4 939 stub $4, [$1+0] 940 941 srl $2, 8, $4 942 and $4, 255, $4 943 stub $4, [$1+1] 944 945 srl $2, 16, $4 946 and $4, 255, $4 947 stub $4, [$1+2] 948 949 srl $2, 24, $4 950 stub $4, [$1+3] 951 952 953 and $3, 255, $4 954 stub $4, [$1+0+4] 955 956 srl $3, 8, $4 957 and $4, 255, $4 958 stub $4, [$1+1+4] 959 960 srl $3, 16, $4 961 and $4, 255, $4 962 stub $4, [$1+2+4] 963 964 srl $3, 24, $4 965 stub $4, [$1+3+4] 966 967$5a: 968 969}) 970 971 972! {store_n_bytes} 973! 974! Stores 1 to 7 bytes little endian 975! 976! parameter 1 address 977! parameter 2 length 978! parameter 3 source register left 979! parameter 4 source register right 980! parameter 5 temp 981! parameter 6 temp2 982! parameter 7 label 983! parameter 8 return label 984 985define(store_n_bytes, { 986 987! {store_n_bytes} 988! $1 $2 $5 $6 $7 $8 $7 $8 $9 989 990$7.0: call .+8 991 sll $2, 2, $6 992 993 add %o7,$7.jmp.table-$7.0,$5 994 995 add $5, $6, $5 996 997 ld [$5], $5 998 999 jmp %o7+$5 1000 nop 1001 1002$7.7: 1003 srl $3, 16, $5 1004 and $5, 0xff, $5 1005 stub $5, [$1+6] 1006$7.6: 1007 srl $3, 8, $5 1008 and $5, 0xff, $5 1009 stub $5, [$1+5] 1010$7.5: 1011 and $3, 0xff, $5 1012 stub $5, [$1+4] 1013$7.4: 1014 srl $4, 24, $5 1015 stub $5, [$1+3] 1016$7.3: 1017 srl $4, 16, $5 1018 and $5, 0xff, $5 1019 stub $5, [$1+2] 1020$7.2: 1021 srl $4, 8, $5 1022 and $5, 0xff, $5 1023 stub $5, [$1+1] 1024$7.1: 1025 and $4, 0xff, $5 1026 1027 1028 ba $8 1029 stub $5, [$1] 1030 1031 .align 4 1032 1033$7.jmp.table: 1034 1035 .word 0 1036 .word $7.1-$7.0 1037 .word $7.2-$7.0 1038 .word $7.3-$7.0 1039 .word $7.4-$7.0 1040 .word $7.5-$7.0 1041 .word $7.6-$7.0 1042 .word $7.7-$7.0 1043}) 1044 1045 1046define(testvalue,{1}) 1047 1048define(register_init, { 1049 1050! For test purposes: 1051 1052 sethi %hi(testvalue), local0 1053 or local0, %lo(testvalue), local0 1054 1055 ifelse($1,{},{}, {mov local0, $1}) 1056 ifelse($2,{},{}, {mov local0, $2}) 1057 ifelse($3,{},{}, {mov local0, $3}) 1058 ifelse($4,{},{}, {mov local0, $4}) 1059 ifelse($5,{},{}, {mov local0, $5}) 1060 ifelse($6,{},{}, {mov local0, $6}) 1061 ifelse($7,{},{}, {mov local0, $7}) 1062 ifelse($8,{},{}, {mov local0, $8}) 1063 1064 mov local0, local1 1065 mov local0, local2 1066 mov local0, local3 1067 mov local0, local4 1068 mov local0, local5 1069 mov local0, local7 1070 mov local0, local6 1071 mov local0, out0 1072 mov local0, out1 1073 mov local0, out2 1074 mov local0, out3 1075 mov local0, out4 1076 mov local0, out5 1077 mov local0, global1 1078 mov local0, global2 1079 mov local0, global3 1080 mov local0, global4 1081 mov local0, global5 1082 1083}) 1084 1085.section ".text" 1086 1087 .align 32 1088 1089.des_enc: 1090 1091 ! key address in3 1092 ! loads key next encryption/decryption first round from [in4] 1093 1094 rounds_macro(in5, out5, 1, .des_enc.1, in3, in4, retl) 1095 1096 1097 .align 32 1098 1099.des_dec: 1100 1101 ! implemented with out5 as first parameter to avoid 1102 ! register exchange in ede modes 1103 1104 ! key address in4 1105 ! loads key next encryption/decryption first round from [in3] 1106 1107 rounds_macro(out5, in5, -1, .des_dec.1, in4, in3, retl) 1108 1109 1110 1111! void DES_encrypt1(data, ks, enc) 1112! ******************************* 1113 1114 .align 32 1115 .global DES_encrypt1 1116 .type DES_encrypt1,#function 1117 1118DES_encrypt1: 1119 1120 save %sp, FRAME, %sp 1121 1122 sethi %hi(.PIC.DES_SPtrans-1f),global1 1123 or global1,%lo(.PIC.DES_SPtrans-1f),global1 11241: call .+8 1125 add %o7,global1,global1 1126 sub global1,.PIC.DES_SPtrans-.des_and,out2 1127 1128 ld [in0], in5 ! left 1129 cmp in2, 0 ! enc 1130 1131 be .encrypt.dec 1132 ld [in0+4], out5 ! right 1133 1134 ! parameter 6 1/2 for include encryption/decryption 1135 ! parameter 7 1 for move in1 to in3 1136 ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3 1137 1138 ip_macro(in5, out5, in5, out5, in3, 0, 1, 1) 1139 1140 rounds_macro(in5, out5, 1, .des_encrypt1.1, in3, in4) ! in4 not used 1141 1142 fp_macro(in5, out5, 1) ! 1 for store to [in0] 1143 1144 ret 1145 restore 1146 1147.encrypt.dec: 1148 1149 add in1, 120, in3 ! use last subkey for first round 1150 1151 ! parameter 6 1/2 for include encryption/decryption 1152 ! parameter 7 1 for move in1 to in3 1153 ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3 1154 1155 ip_macro(in5, out5, out5, in5, in4, 2, 0, 1) ! include dec, ks in4 1156 1157 fp_macro(out5, in5, 1) ! 1 for store to [in0] 1158 1159 ret 1160 restore 1161 1162.DES_encrypt1.end: 1163 .size DES_encrypt1,.DES_encrypt1.end-DES_encrypt1 1164 1165 1166! void DES_encrypt2(data, ks, enc) 1167!********************************* 1168 1169 ! encrypts/decrypts without initial/final permutation 1170 1171 .align 32 1172 .global DES_encrypt2 1173 .type DES_encrypt2,#function 1174 1175DES_encrypt2: 1176 1177 save %sp, FRAME, %sp 1178 1179 sethi %hi(.PIC.DES_SPtrans-1f),global1 1180 or global1,%lo(.PIC.DES_SPtrans-1f),global1 11811: call .+8 1182 add %o7,global1,global1 1183 sub global1,.PIC.DES_SPtrans-.des_and,out2 1184 1185 ! Set sbox address 1 to 6 and rotate halves 3 left 1186 ! Errors caught by destest? Yes. Still? *NO* 1187 1188 !sethi %hi(DES_SPtrans), global1 ! address sbox 1 1189 1190 !or global1, %lo(DES_SPtrans), global1 ! sbox 1 1191 1192 add global1, 256, global2 ! sbox 2 1193 add global1, 512, global3 ! sbox 3 1194 1195 ld [in0], out5 ! right 1196 add global1, 768, global4 ! sbox 4 1197 add global1, 1024, global5 ! sbox 5 1198 1199 ld [in0+4], in5 ! left 1200 add global1, 1280, local6 ! sbox 6 1201 add global1, 1792, out3 ! sbox 8 1202 1203 ! rotate 1204 1205 sll in5, 3, local5 1206 mov in1, in3 ! key address to in3 1207 1208 sll out5, 3, local7 1209 srl in5, 29, in5 1210 1211 srl out5, 29, out5 1212 add in5, local5, in5 1213 1214 add out5, local7, out5 1215 cmp in2, 0 1216 1217 ! we use our own stackframe 1218 1219 be .encrypt2.dec 1220 STPTR in0, [%sp+BIAS+ARG0+0*ARGSZ] 1221 1222 ld [in3], out0 ! key 7531 first round 1223 mov LOOPS, out4 ! loop counter 1224 1225 ld [in3+4], out1 ! key 8642 first round 1226 sethi %hi(0x0000FC00), local5 1227 1228 call .des_enc 1229 mov in3, in4 1230 1231 ! rotate 1232 sll in5, 29, in0 1233 srl in5, 3, in5 1234 sll out5, 29, in1 1235 add in5, in0, in5 1236 srl out5, 3, out5 1237 LDPTR [%sp+BIAS+ARG0+0*ARGSZ], in0 1238 add out5, in1, out5 1239 st in5, [in0] 1240 st out5, [in0+4] 1241 1242 ret 1243 restore 1244 1245 1246.encrypt2.dec: 1247 1248 add in3, 120, in4 1249 1250 ld [in4], out0 ! key 7531 first round 1251 mov LOOPS, out4 ! loop counter 1252 1253 ld [in4+4], out1 ! key 8642 first round 1254 sethi %hi(0x0000FC00), local5 1255 1256 mov in5, local1 ! left expected in out5 1257 mov out5, in5 1258 1259 call .des_dec 1260 mov local1, out5 1261 1262.encrypt2.finish: 1263 1264 ! rotate 1265 sll in5, 29, in0 1266 srl in5, 3, in5 1267 sll out5, 29, in1 1268 add in5, in0, in5 1269 srl out5, 3, out5 1270 LDPTR [%sp+BIAS+ARG0+0*ARGSZ], in0 1271 add out5, in1, out5 1272 st out5, [in0] 1273 st in5, [in0+4] 1274 1275 ret 1276 restore 1277 1278.DES_encrypt2.end: 1279 .size DES_encrypt2, .DES_encrypt2.end-DES_encrypt2 1280 1281 1282! void DES_encrypt3(data, ks1, ks2, ks3) 1283! ************************************** 1284 1285 .align 32 1286 .global DES_encrypt3 1287 .type DES_encrypt3,#function 1288 1289DES_encrypt3: 1290 1291 save %sp, FRAME, %sp 1292 1293 sethi %hi(.PIC.DES_SPtrans-1f),global1 1294 or global1,%lo(.PIC.DES_SPtrans-1f),global1 12951: call .+8 1296 add %o7,global1,global1 1297 sub global1,.PIC.DES_SPtrans-.des_and,out2 1298 1299 ld [in0], in5 ! left 1300 add in2, 120, in4 ! ks2 1301 1302 ld [in0+4], out5 ! right 1303 mov in3, in2 ! save ks3 1304 1305 ! parameter 6 1/2 for include encryption/decryption 1306 ! parameter 7 1 for mov in1 to in3 1307 ! parameter 8 1 for mov in3 to in4 1308 ! parameter 9 1 for load ks3 and ks2 to in4 and in3 1309 1310 ip_macro(in5, out5, in5, out5, in3, 1, 1, 0, 0) 1311 1312 call .des_dec 1313 mov in2, in3 ! preload ks3 1314 1315 call .des_enc 1316 nop 1317 1318 fp_macro(in5, out5, 1) 1319 1320 ret 1321 restore 1322 1323.DES_encrypt3.end: 1324 .size DES_encrypt3,.DES_encrypt3.end-DES_encrypt3 1325 1326 1327! void DES_decrypt3(data, ks1, ks2, ks3) 1328! ************************************** 1329 1330 .align 32 1331 .global DES_decrypt3 1332 .type DES_decrypt3,#function 1333 1334DES_decrypt3: 1335 1336 save %sp, FRAME, %sp 1337 1338 sethi %hi(.PIC.DES_SPtrans-1f),global1 1339 or global1,%lo(.PIC.DES_SPtrans-1f),global1 13401: call .+8 1341 add %o7,global1,global1 1342 sub global1,.PIC.DES_SPtrans-.des_and,out2 1343 1344 ld [in0], in5 ! left 1345 add in3, 120, in4 ! ks3 1346 1347 ld [in0+4], out5 ! right 1348 mov in2, in3 ! ks2 1349 1350 ! parameter 6 1/2 for include encryption/decryption 1351 ! parameter 7 1 for mov in1 to in3 1352 ! parameter 8 1 for mov in3 to in4 1353 ! parameter 9 1 for load ks3 and ks2 to in4 and in3 1354 1355 ip_macro(in5, out5, out5, in5, in4, 2, 0, 0, 0) 1356 1357 call .des_enc 1358 add in1, 120, in4 ! preload ks1 1359 1360 call .des_dec 1361 nop 1362 1363 fp_macro(out5, in5, 1) 1364 1365 ret 1366 restore 1367 1368.DES_decrypt3.end: 1369 .size DES_decrypt3,.DES_decrypt3.end-DES_decrypt3 1370 1371! void DES_ncbc_encrypt(input, output, length, schedule, ivec, enc) 1372! ***************************************************************** 1373 1374 1375 .align 32 1376 .global DES_ncbc_encrypt 1377 .type DES_ncbc_encrypt,#function 1378 1379DES_ncbc_encrypt: 1380 1381 save %sp, FRAME, %sp 1382 1383 define({INPUT}, { [%sp+BIAS+ARG0+0*ARGSZ] }) 1384 define({OUTPUT}, { [%sp+BIAS+ARG0+1*ARGSZ] }) 1385 define({IVEC}, { [%sp+BIAS+ARG0+4*ARGSZ] }) 1386 1387 sethi %hi(.PIC.DES_SPtrans-1f),global1 1388 or global1,%lo(.PIC.DES_SPtrans-1f),global1 13891: call .+8 1390 add %o7,global1,global1 1391 sub global1,.PIC.DES_SPtrans-.des_and,out2 1392 1393 cmp in5, 0 ! enc 1394 1395 be .ncbc.dec 1396 STPTR in4, IVEC 1397 1398 ! addr left right temp label 1399 load_little_endian(in4, in5, out5, local3, .LLE1) ! iv 1400 1401 addcc in2, -8, in2 ! bytes missing when first block done 1402 1403 bl .ncbc.enc.seven.or.less 1404 mov in3, in4 ! schedule 1405 1406.ncbc.enc.next.block: 1407 1408 load_little_endian(in0, out4, global4, local3, .LLE2) ! block 1409 1410.ncbc.enc.next.block_1: 1411 1412 xor in5, out4, in5 ! iv xor 1413 xor out5, global4, out5 ! iv xor 1414 1415 ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3 1416 ip_macro(in5, out5, in5, out5, in3, 0, 0, 2) 1417 1418.ncbc.enc.next.block_2: 1419 1420!// call .des_enc ! compares in2 to 8 1421! rounds inlined for alignment purposes 1422 1423 add global1, 768, global4 ! address sbox 4 since register used below 1424 1425 rounds_macro(in5, out5, 1, .ncbc.enc.1, in3, in4) ! include encryption ks in3 1426 1427 bl .ncbc.enc.next.block_fp 1428 add in0, 8, in0 ! input address 1429 1430 ! If 8 or more bytes are to be encrypted after this block, 1431 ! we combine final permutation for this block with initial 1432 ! permutation for next block. Load next block: 1433 1434 load_little_endian(in0, global3, global4, local5, .LLE12) 1435 1436 ! parameter 1 original left 1437 ! parameter 2 original right 1438 ! parameter 3 left ip 1439 ! parameter 4 right ip 1440 ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4 1441 ! 2: mov in4 to in3 1442 ! 1443 ! also adds -8 to length in2 and loads loop counter to out4 1444 1445 fp_ip_macro(out0, out1, global3, global4, 2) 1446 1447 store_little_endian(in1, out0, out1, local3, .SLE10) ! block 1448 1449 ld [in3], out0 ! key 7531 first round next block 1450 mov in5, local1 1451 xor global3, out5, in5 ! iv xor next block 1452 1453 ld [in3+4], out1 ! key 8642 1454 add global1, 512, global3 ! address sbox 3 since register used 1455 xor global4, local1, out5 ! iv xor next block 1456 1457 ba .ncbc.enc.next.block_2 1458 add in1, 8, in1 ! output address 1459 1460.ncbc.enc.next.block_fp: 1461 1462 fp_macro(in5, out5) 1463 1464 store_little_endian(in1, in5, out5, local3, .SLE1) ! block 1465 1466 addcc in2, -8, in2 ! bytes missing when next block done 1467 1468 bpos .ncbc.enc.next.block 1469 add in1, 8, in1 1470 1471.ncbc.enc.seven.or.less: 1472 1473 cmp in2, -8 1474 1475 ble .ncbc.enc.finish 1476 nop 1477 1478 add in2, 8, local1 ! bytes to load 1479 1480 ! addr, length, dest left, dest right, temp, temp2, label, ret label 1481 load_n_bytes(in0, local1, global4, out4, local2, local3, .LNB1, .ncbc.enc.next.block_1) 1482 1483 ! Loads 1 to 7 bytes little endian to global4, out4 1484 1485 1486.ncbc.enc.finish: 1487 1488 LDPTR IVEC, local4 1489 store_little_endian(local4, in5, out5, local5, .SLE2) ! ivec 1490 1491 ret 1492 restore 1493 1494 1495.ncbc.dec: 1496 1497 STPTR in0, INPUT 1498 cmp in2, 0 ! length 1499 add in3, 120, in3 1500 1501 LDPTR IVEC, local7 ! ivec 1502 ble .ncbc.dec.finish 1503 mov in3, in4 ! schedule 1504 1505 STPTR in1, OUTPUT 1506 mov in0, local5 ! input 1507 1508 load_little_endian(local7, in0, in1, local3, .LLE3) ! ivec 1509 1510.ncbc.dec.next.block: 1511 1512 load_little_endian(local5, in5, out5, local3, .LLE4) ! block 1513 1514 ! parameter 6 1/2 for include encryption/decryption 1515 ! parameter 7 1 for mov in1 to in3 1516 ! parameter 8 1 for mov in3 to in4 1517 1518 ip_macro(in5, out5, out5, in5, in4, 2, 0, 1) ! include decryption ks in4 1519 1520 fp_macro(out5, in5, 0, 1) ! 1 for input and output address to local5/7 1521 1522 ! in2 is bytes left to be stored 1523 ! in2 is compared to 8 in the rounds 1524 1525 xor out5, in0, out4 ! iv xor 1526 bl .ncbc.dec.seven.or.less 1527 xor in5, in1, global4 ! iv xor 1528 1529 ! Load ivec next block now, since input and output address might be the same. 1530 1531 load_little_endian_inc(local5, in0, in1, local3, .LLE5) ! iv 1532 1533 store_little_endian(local7, out4, global4, local3, .SLE3) 1534 1535 STPTR local5, INPUT 1536 add local7, 8, local7 1537 addcc in2, -8, in2 1538 1539 bg .ncbc.dec.next.block 1540 STPTR local7, OUTPUT 1541 1542 1543.ncbc.dec.store.iv: 1544 1545 LDPTR IVEC, local4 ! ivec 1546 store_little_endian(local4, in0, in1, local5, .SLE4) 1547 1548.ncbc.dec.finish: 1549 1550 ret 1551 restore 1552 1553.ncbc.dec.seven.or.less: 1554 1555 load_little_endian_inc(local5, in0, in1, local3, .LLE13) ! ivec 1556 1557 store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB1, .ncbc.dec.store.iv) 1558 1559 1560.DES_ncbc_encrypt.end: 1561 .size DES_ncbc_encrypt, .DES_ncbc_encrypt.end-DES_ncbc_encrypt 1562 1563 1564! void DES_ede3_cbc_encrypt(input, output, length, ks1, ks2, ks3, ivec, enc) 1565! ************************************************************************** 1566 1567 1568 .align 32 1569 .global DES_ede3_cbc_encrypt 1570 .type DES_ede3_cbc_encrypt,#function 1571 1572DES_ede3_cbc_encrypt: 1573 1574 save %sp, FRAME, %sp 1575 1576 define({KS1}, { [%sp+BIAS+ARG0+3*ARGSZ] }) 1577 define({KS2}, { [%sp+BIAS+ARG0+4*ARGSZ] }) 1578 define({KS3}, { [%sp+BIAS+ARG0+5*ARGSZ] }) 1579 1580 sethi %hi(.PIC.DES_SPtrans-1f),global1 1581 or global1,%lo(.PIC.DES_SPtrans-1f),global1 15821: call .+8 1583 add %o7,global1,global1 1584 sub global1,.PIC.DES_SPtrans-.des_and,out2 1585 1586 LDPTR [%fp+BIAS+ARG0+7*ARGSZ], local3 ! enc 1587 LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec 1588 cmp local3, 0 ! enc 1589 1590 be .ede3.dec 1591 STPTR in4, KS2 1592 1593 STPTR in5, KS3 1594 1595 load_little_endian(local4, in5, out5, local3, .LLE6) ! ivec 1596 1597 addcc in2, -8, in2 ! bytes missing after next block 1598 1599 bl .ede3.enc.seven.or.less 1600 STPTR in3, KS1 1601 1602.ede3.enc.next.block: 1603 1604 load_little_endian(in0, out4, global4, local3, .LLE7) 1605 1606.ede3.enc.next.block_1: 1607 1608 LDPTR KS2, in4 1609 xor in5, out4, in5 ! iv xor 1610 xor out5, global4, out5 ! iv xor 1611 1612 LDPTR KS1, in3 1613 add in4, 120, in4 ! for decryption we use last subkey first 1614 nop 1615 1616 ip_macro(in5, out5, in5, out5, in3) 1617 1618.ede3.enc.next.block_2: 1619 1620 call .des_enc ! ks1 in3 1621 nop 1622 1623 call .des_dec ! ks2 in4 1624 LDPTR KS3, in3 1625 1626 call .des_enc ! ks3 in3 compares in2 to 8 1627 nop 1628 1629 bl .ede3.enc.next.block_fp 1630 add in0, 8, in0 1631 1632 ! If 8 or more bytes are to be encrypted after this block, 1633 ! we combine final permutation for this block with initial 1634 ! permutation for next block. Load next block: 1635 1636 load_little_endian(in0, global3, global4, local5, .LLE11) 1637 1638 ! parameter 1 original left 1639 ! parameter 2 original right 1640 ! parameter 3 left ip 1641 ! parameter 4 right ip 1642 ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4 1643 ! 2: mov in4 to in3 1644 ! 1645 ! also adds -8 to length in2 and loads loop counter to out4 1646 1647 fp_ip_macro(out0, out1, global3, global4, 1) 1648 1649 store_little_endian(in1, out0, out1, local3, .SLE9) ! block 1650 1651 mov in5, local1 1652 xor global3, out5, in5 ! iv xor next block 1653 1654 ld [in3], out0 ! key 7531 1655 add global1, 512, global3 ! address sbox 3 1656 xor global4, local1, out5 ! iv xor next block 1657 1658 ld [in3+4], out1 ! key 8642 1659 add global1, 768, global4 ! address sbox 4 1660 ba .ede3.enc.next.block_2 1661 add in1, 8, in1 1662 1663.ede3.enc.next.block_fp: 1664 1665 fp_macro(in5, out5) 1666 1667 store_little_endian(in1, in5, out5, local3, .SLE5) ! block 1668 1669 addcc in2, -8, in2 ! bytes missing when next block done 1670 1671 bpos .ede3.enc.next.block 1672 add in1, 8, in1 1673 1674.ede3.enc.seven.or.less: 1675 1676 cmp in2, -8 1677 1678 ble .ede3.enc.finish 1679 nop 1680 1681 add in2, 8, local1 ! bytes to load 1682 1683 ! addr, length, dest left, dest right, temp, temp2, label, ret label 1684 load_n_bytes(in0, local1, global4, out4, local2, local3, .LNB2, .ede3.enc.next.block_1) 1685 1686.ede3.enc.finish: 1687 1688 LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec 1689 store_little_endian(local4, in5, out5, local5, .SLE6) ! ivec 1690 1691 ret 1692 restore 1693 1694.ede3.dec: 1695 1696 STPTR in0, INPUT 1697 add in5, 120, in5 1698 1699 STPTR in1, OUTPUT 1700 mov in0, local5 1701 add in3, 120, in3 1702 1703 STPTR in3, KS1 1704 cmp in2, 0 1705 1706 ble .ede3.dec.finish 1707 STPTR in5, KS3 1708 1709 LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local7 ! iv 1710 load_little_endian(local7, in0, in1, local3, .LLE8) 1711 1712.ede3.dec.next.block: 1713 1714 load_little_endian(local5, in5, out5, local3, .LLE9) 1715 1716 ! parameter 6 1/2 for include encryption/decryption 1717 ! parameter 7 1 for mov in1 to in3 1718 ! parameter 8 1 for mov in3 to in4 1719 ! parameter 9 1 for load ks3 and ks2 to in4 and in3 1720 1721 ip_macro(in5, out5, out5, in5, in4, 2, 0, 0, 1) ! inc .des_dec ks3 in4 1722 1723 call .des_enc ! ks2 in3 1724 LDPTR KS1, in4 1725 1726 call .des_dec ! ks1 in4 1727 nop 1728 1729 fp_macro(out5, in5, 0, 1) ! 1 for input and output address local5/7 1730 1731 ! in2 is bytes left to be stored 1732 ! in2 is compared to 8 in the rounds 1733 1734 xor out5, in0, out4 1735 bl .ede3.dec.seven.or.less 1736 xor in5, in1, global4 1737 1738 load_little_endian_inc(local5, in0, in1, local3, .LLE10) ! iv next block 1739 1740 store_little_endian(local7, out4, global4, local3, .SLE7) ! block 1741 1742 STPTR local5, INPUT 1743 addcc in2, -8, in2 1744 add local7, 8, local7 1745 1746 bg .ede3.dec.next.block 1747 STPTR local7, OUTPUT 1748 1749.ede3.dec.store.iv: 1750 1751 LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec 1752 store_little_endian(local4, in0, in1, local5, .SLE8) ! ivec 1753 1754.ede3.dec.finish: 1755 1756 ret 1757 restore 1758 1759.ede3.dec.seven.or.less: 1760 1761 load_little_endian_inc(local5, in0, in1, local3, .LLE14) ! iv 1762 1763 store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB2, .ede3.dec.store.iv) 1764 1765 1766.DES_ede3_cbc_encrypt.end: 1767 .size DES_ede3_cbc_encrypt,.DES_ede3_cbc_encrypt.end-DES_ede3_cbc_encrypt 1768 1769 .align 256 1770 .type .des_and,#object 1771 .size .des_and,284 1772 1773.des_and: 1774 1775! This table is used for AND 0xFC when it is known that register 1776! bits 8-31 are zero. Makes it possible to do three arithmetic 1777! operations in one cycle. 1778 1779 .byte 0, 0, 0, 0, 4, 4, 4, 4 1780 .byte 8, 8, 8, 8, 12, 12, 12, 12 1781 .byte 16, 16, 16, 16, 20, 20, 20, 20 1782 .byte 24, 24, 24, 24, 28, 28, 28, 28 1783 .byte 32, 32, 32, 32, 36, 36, 36, 36 1784 .byte 40, 40, 40, 40, 44, 44, 44, 44 1785 .byte 48, 48, 48, 48, 52, 52, 52, 52 1786 .byte 56, 56, 56, 56, 60, 60, 60, 60 1787 .byte 64, 64, 64, 64, 68, 68, 68, 68 1788 .byte 72, 72, 72, 72, 76, 76, 76, 76 1789 .byte 80, 80, 80, 80, 84, 84, 84, 84 1790 .byte 88, 88, 88, 88, 92, 92, 92, 92 1791 .byte 96, 96, 96, 96, 100, 100, 100, 100 1792 .byte 104, 104, 104, 104, 108, 108, 108, 108 1793 .byte 112, 112, 112, 112, 116, 116, 116, 116 1794 .byte 120, 120, 120, 120, 124, 124, 124, 124 1795 .byte 128, 128, 128, 128, 132, 132, 132, 132 1796 .byte 136, 136, 136, 136, 140, 140, 140, 140 1797 .byte 144, 144, 144, 144, 148, 148, 148, 148 1798 .byte 152, 152, 152, 152, 156, 156, 156, 156 1799 .byte 160, 160, 160, 160, 164, 164, 164, 164 1800 .byte 168, 168, 168, 168, 172, 172, 172, 172 1801 .byte 176, 176, 176, 176, 180, 180, 180, 180 1802 .byte 184, 184, 184, 184, 188, 188, 188, 188 1803 .byte 192, 192, 192, 192, 196, 196, 196, 196 1804 .byte 200, 200, 200, 200, 204, 204, 204, 204 1805 .byte 208, 208, 208, 208, 212, 212, 212, 212 1806 .byte 216, 216, 216, 216, 220, 220, 220, 220 1807 .byte 224, 224, 224, 224, 228, 228, 228, 228 1808 .byte 232, 232, 232, 232, 236, 236, 236, 236 1809 .byte 240, 240, 240, 240, 244, 244, 244, 244 1810 .byte 248, 248, 248, 248, 252, 252, 252, 252 1811 1812 ! 5 numbers for initial/final permutation 1813 1814 .word 0x0f0f0f0f ! offset 256 1815 .word 0x0000ffff ! 260 1816 .word 0x33333333 ! 264 1817 .word 0x00ff00ff ! 268 1818 .word 0x55555555 ! 272 1819 1820 .word 0 ! 276 1821 .word LOOPS ! 280 1822 .word 0x0000FC00 ! 284 1823 1824 .global DES_SPtrans 1825 .type DES_SPtrans,#object 1826 .size DES_SPtrans,2048 1827.align 64 1828DES_SPtrans: 1829.PIC.DES_SPtrans: 1830 ! nibble 0 1831 .word 0x02080800, 0x00080000, 0x02000002, 0x02080802 1832 .word 0x02000000, 0x00080802, 0x00080002, 0x02000002 1833 .word 0x00080802, 0x02080800, 0x02080000, 0x00000802 1834 .word 0x02000802, 0x02000000, 0x00000000, 0x00080002 1835 .word 0x00080000, 0x00000002, 0x02000800, 0x00080800 1836 .word 0x02080802, 0x02080000, 0x00000802, 0x02000800 1837 .word 0x00000002, 0x00000800, 0x00080800, 0x02080002 1838 .word 0x00000800, 0x02000802, 0x02080002, 0x00000000 1839 .word 0x00000000, 0x02080802, 0x02000800, 0x00080002 1840 .word 0x02080800, 0x00080000, 0x00000802, 0x02000800 1841 .word 0x02080002, 0x00000800, 0x00080800, 0x02000002 1842 .word 0x00080802, 0x00000002, 0x02000002, 0x02080000 1843 .word 0x02080802, 0x00080800, 0x02080000, 0x02000802 1844 .word 0x02000000, 0x00000802, 0x00080002, 0x00000000 1845 .word 0x00080000, 0x02000000, 0x02000802, 0x02080800 1846 .word 0x00000002, 0x02080002, 0x00000800, 0x00080802 1847 ! nibble 1 1848 .word 0x40108010, 0x00000000, 0x00108000, 0x40100000 1849 .word 0x40000010, 0x00008010, 0x40008000, 0x00108000 1850 .word 0x00008000, 0x40100010, 0x00000010, 0x40008000 1851 .word 0x00100010, 0x40108000, 0x40100000, 0x00000010 1852 .word 0x00100000, 0x40008010, 0x40100010, 0x00008000 1853 .word 0x00108010, 0x40000000, 0x00000000, 0x00100010 1854 .word 0x40008010, 0x00108010, 0x40108000, 0x40000010 1855 .word 0x40000000, 0x00100000, 0x00008010, 0x40108010 1856 .word 0x00100010, 0x40108000, 0x40008000, 0x00108010 1857 .word 0x40108010, 0x00100010, 0x40000010, 0x00000000 1858 .word 0x40000000, 0x00008010, 0x00100000, 0x40100010 1859 .word 0x00008000, 0x40000000, 0x00108010, 0x40008010 1860 .word 0x40108000, 0x00008000, 0x00000000, 0x40000010 1861 .word 0x00000010, 0x40108010, 0x00108000, 0x40100000 1862 .word 0x40100010, 0x00100000, 0x00008010, 0x40008000 1863 .word 0x40008010, 0x00000010, 0x40100000, 0x00108000 1864 ! nibble 2 1865 .word 0x04000001, 0x04040100, 0x00000100, 0x04000101 1866 .word 0x00040001, 0x04000000, 0x04000101, 0x00040100 1867 .word 0x04000100, 0x00040000, 0x04040000, 0x00000001 1868 .word 0x04040101, 0x00000101, 0x00000001, 0x04040001 1869 .word 0x00000000, 0x00040001, 0x04040100, 0x00000100 1870 .word 0x00000101, 0x04040101, 0x00040000, 0x04000001 1871 .word 0x04040001, 0x04000100, 0x00040101, 0x04040000 1872 .word 0x00040100, 0x00000000, 0x04000000, 0x00040101 1873 .word 0x04040100, 0x00000100, 0x00000001, 0x00040000 1874 .word 0x00000101, 0x00040001, 0x04040000, 0x04000101 1875 .word 0x00000000, 0x04040100, 0x00040100, 0x04040001 1876 .word 0x00040001, 0x04000000, 0x04040101, 0x00000001 1877 .word 0x00040101, 0x04000001, 0x04000000, 0x04040101 1878 .word 0x00040000, 0x04000100, 0x04000101, 0x00040100 1879 .word 0x04000100, 0x00000000, 0x04040001, 0x00000101 1880 .word 0x04000001, 0x00040101, 0x00000100, 0x04040000 1881 ! nibble 3 1882 .word 0x00401008, 0x10001000, 0x00000008, 0x10401008 1883 .word 0x00000000, 0x10400000, 0x10001008, 0x00400008 1884 .word 0x10401000, 0x10000008, 0x10000000, 0x00001008 1885 .word 0x10000008, 0x00401008, 0x00400000, 0x10000000 1886 .word 0x10400008, 0x00401000, 0x00001000, 0x00000008 1887 .word 0x00401000, 0x10001008, 0x10400000, 0x00001000 1888 .word 0x00001008, 0x00000000, 0x00400008, 0x10401000 1889 .word 0x10001000, 0x10400008, 0x10401008, 0x00400000 1890 .word 0x10400008, 0x00001008, 0x00400000, 0x10000008 1891 .word 0x00401000, 0x10001000, 0x00000008, 0x10400000 1892 .word 0x10001008, 0x00000000, 0x00001000, 0x00400008 1893 .word 0x00000000, 0x10400008, 0x10401000, 0x00001000 1894 .word 0x10000000, 0x10401008, 0x00401008, 0x00400000 1895 .word 0x10401008, 0x00000008, 0x10001000, 0x00401008 1896 .word 0x00400008, 0x00401000, 0x10400000, 0x10001008 1897 .word 0x00001008, 0x10000000, 0x10000008, 0x10401000 1898 ! nibble 4 1899 .word 0x08000000, 0x00010000, 0x00000400, 0x08010420 1900 .word 0x08010020, 0x08000400, 0x00010420, 0x08010000 1901 .word 0x00010000, 0x00000020, 0x08000020, 0x00010400 1902 .word 0x08000420, 0x08010020, 0x08010400, 0x00000000 1903 .word 0x00010400, 0x08000000, 0x00010020, 0x00000420 1904 .word 0x08000400, 0x00010420, 0x00000000, 0x08000020 1905 .word 0x00000020, 0x08000420, 0x08010420, 0x00010020 1906 .word 0x08010000, 0x00000400, 0x00000420, 0x08010400 1907 .word 0x08010400, 0x08000420, 0x00010020, 0x08010000 1908 .word 0x00010000, 0x00000020, 0x08000020, 0x08000400 1909 .word 0x08000000, 0x00010400, 0x08010420, 0x00000000 1910 .word 0x00010420, 0x08000000, 0x00000400, 0x00010020 1911 .word 0x08000420, 0x00000400, 0x00000000, 0x08010420 1912 .word 0x08010020, 0x08010400, 0x00000420, 0x00010000 1913 .word 0x00010400, 0x08010020, 0x08000400, 0x00000420 1914 .word 0x00000020, 0x00010420, 0x08010000, 0x08000020 1915 ! nibble 5 1916 .word 0x80000040, 0x00200040, 0x00000000, 0x80202000 1917 .word 0x00200040, 0x00002000, 0x80002040, 0x00200000 1918 .word 0x00002040, 0x80202040, 0x00202000, 0x80000000 1919 .word 0x80002000, 0x80000040, 0x80200000, 0x00202040 1920 .word 0x00200000, 0x80002040, 0x80200040, 0x00000000 1921 .word 0x00002000, 0x00000040, 0x80202000, 0x80200040 1922 .word 0x80202040, 0x80200000, 0x80000000, 0x00002040 1923 .word 0x00000040, 0x00202000, 0x00202040, 0x80002000 1924 .word 0x00002040, 0x80000000, 0x80002000, 0x00202040 1925 .word 0x80202000, 0x00200040, 0x00000000, 0x80002000 1926 .word 0x80000000, 0x00002000, 0x80200040, 0x00200000 1927 .word 0x00200040, 0x80202040, 0x00202000, 0x00000040 1928 .word 0x80202040, 0x00202000, 0x00200000, 0x80002040 1929 .word 0x80000040, 0x80200000, 0x00202040, 0x00000000 1930 .word 0x00002000, 0x80000040, 0x80002040, 0x80202000 1931 .word 0x80200000, 0x00002040, 0x00000040, 0x80200040 1932 ! nibble 6 1933 .word 0x00004000, 0x00000200, 0x01000200, 0x01000004 1934 .word 0x01004204, 0x00004004, 0x00004200, 0x00000000 1935 .word 0x01000000, 0x01000204, 0x00000204, 0x01004000 1936 .word 0x00000004, 0x01004200, 0x01004000, 0x00000204 1937 .word 0x01000204, 0x00004000, 0x00004004, 0x01004204 1938 .word 0x00000000, 0x01000200, 0x01000004, 0x00004200 1939 .word 0x01004004, 0x00004204, 0x01004200, 0x00000004 1940 .word 0x00004204, 0x01004004, 0x00000200, 0x01000000 1941 .word 0x00004204, 0x01004000, 0x01004004, 0x00000204 1942 .word 0x00004000, 0x00000200, 0x01000000, 0x01004004 1943 .word 0x01000204, 0x00004204, 0x00004200, 0x00000000 1944 .word 0x00000200, 0x01000004, 0x00000004, 0x01000200 1945 .word 0x00000000, 0x01000204, 0x01000200, 0x00004200 1946 .word 0x00000204, 0x00004000, 0x01004204, 0x01000000 1947 .word 0x01004200, 0x00000004, 0x00004004, 0x01004204 1948 .word 0x01000004, 0x01004200, 0x01004000, 0x00004004 1949 ! nibble 7 1950 .word 0x20800080, 0x20820000, 0x00020080, 0x00000000 1951 .word 0x20020000, 0x00800080, 0x20800000, 0x20820080 1952 .word 0x00000080, 0x20000000, 0x00820000, 0x00020080 1953 .word 0x00820080, 0x20020080, 0x20000080, 0x20800000 1954 .word 0x00020000, 0x00820080, 0x00800080, 0x20020000 1955 .word 0x20820080, 0x20000080, 0x00000000, 0x00820000 1956 .word 0x20000000, 0x00800000, 0x20020080, 0x20800080 1957 .word 0x00800000, 0x00020000, 0x20820000, 0x00000080 1958 .word 0x00800000, 0x00020000, 0x20000080, 0x20820080 1959 .word 0x00020080, 0x20000000, 0x00000000, 0x00820000 1960 .word 0x20800080, 0x20020080, 0x20020000, 0x00800080 1961 .word 0x20820000, 0x00000080, 0x00800080, 0x20020000 1962 .word 0x20820080, 0x00800000, 0x20800000, 0x20000080 1963 .word 0x00820000, 0x00020080, 0x20020080, 0x20800000 1964 .word 0x00000080, 0x20820000, 0x00820080, 0x00000000 1965 .word 0x20000000, 0x20800080, 0x00020000, 0x00820080 1966 1967