1#! /usr/bin/env perl 2# Copyright 2014-2022 The OpenSSL Project Authors. All Rights Reserved. 3# Copyright 2021- IBM Inc. All rights reserved 4# 5# Licensed under the Apache License 2.0 (the "License"). You may not use 6# this file except in compliance with the License. You can obtain a copy 7# in the file LICENSE in the source distribution or at 8# https://www.openssl.org/source/license.html 9# 10#=================================================================================== 11# Written by Danny Tsen <dtsen@us.ibm.com> for OpenSSL Project, 12# 13# GHASH is based on the Karatsuba multiplication method. 14# 15# Xi xor X1 16# 17# X1 * H^4 + X2 * H^3 + x3 * H^2 + X4 * H = 18# (X1.h * H4.h + xX.l * H4.l + X1 * H4) + 19# (X2.h * H3.h + X2.l * H3.l + X2 * H3) + 20# (X3.h * H2.h + X3.l * H2.l + X3 * H2) + 21# (X4.h * H.h + X4.l * H.l + X4 * H) 22# 23# Xi = v0 24# H Poly = v2 25# Hash keys = v3 - v14 26# ( H.l, H, H.h) 27# ( H^2.l, H^2, H^2.h) 28# ( H^3.l, H^3, H^3.h) 29# ( H^4.l, H^4, H^4.h) 30# 31# v30 is IV 32# v31 - counter 1 33# 34# AES used, 35# vs0 - vs14 for round keys 36# v15, v16, v17, v18, v19, v20, v21, v22 for 8 blocks (encrypted) 37# 38# This implementation uses stitched AES-GCM approach to improve overall performance. 39# AES is implemented with 8x blocks and GHASH is using 2 4x blocks. 40# 41# Current large block (16384 bytes) performance per second with 128 bit key -- 42# 43# Encrypt Decrypt 44# Power10[le] (3.5GHz) 5.32G 5.26G 45# 46# =================================================================================== 47# 48# $output is the last argument if it looks like a file (it has an extension) 49# $flavour is the first argument if it doesn't look like a file 50$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef; 51$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef; 52 53if ($flavour =~ /64/) { 54 $SIZE_T=8; 55 $LRSAVE=2*$SIZE_T; 56 $STU="stdu"; 57 $POP="ld"; 58 $PUSH="std"; 59 $UCMP="cmpld"; 60 $SHRI="srdi"; 61} elsif ($flavour =~ /32/) { 62 $SIZE_T=4; 63 $LRSAVE=$SIZE_T; 64 $STU="stwu"; 65 $POP="lwz"; 66 $PUSH="stw"; 67 $UCMP="cmplw"; 68 $SHRI="srwi"; 69} else { die "nonsense $flavour"; } 70 71$sp="r1"; 72$FRAME=6*$SIZE_T+13*16; # 13*16 is for v20-v31 offload 73 74$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 75( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or 76( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or 77die "can't locate ppc-xlate.pl"; 78 79open STDOUT,"| $^X $xlate $flavour \"$output\"" 80 or die "can't call $xlate: $!"; 81 82$code=<<___; 83.machine "any" 84.text 85 86# 4x loops 87# v15 - v18 - input states 88# vs1 - vs9 - round keys 89# 90.macro Loop_aes_middle4x 91 xxlor 19+32, 1, 1 92 xxlor 20+32, 2, 2 93 xxlor 21+32, 3, 3 94 xxlor 22+32, 4, 4 95 96 vcipher 15, 15, 19 97 vcipher 16, 16, 19 98 vcipher 17, 17, 19 99 vcipher 18, 18, 19 100 101 vcipher 15, 15, 20 102 vcipher 16, 16, 20 103 vcipher 17, 17, 20 104 vcipher 18, 18, 20 105 106 vcipher 15, 15, 21 107 vcipher 16, 16, 21 108 vcipher 17, 17, 21 109 vcipher 18, 18, 21 110 111 vcipher 15, 15, 22 112 vcipher 16, 16, 22 113 vcipher 17, 17, 22 114 vcipher 18, 18, 22 115 116 xxlor 19+32, 5, 5 117 xxlor 20+32, 6, 6 118 xxlor 21+32, 7, 7 119 xxlor 22+32, 8, 8 120 121 vcipher 15, 15, 19 122 vcipher 16, 16, 19 123 vcipher 17, 17, 19 124 vcipher 18, 18, 19 125 126 vcipher 15, 15, 20 127 vcipher 16, 16, 20 128 vcipher 17, 17, 20 129 vcipher 18, 18, 20 130 131 vcipher 15, 15, 21 132 vcipher 16, 16, 21 133 vcipher 17, 17, 21 134 vcipher 18, 18, 21 135 136 vcipher 15, 15, 22 137 vcipher 16, 16, 22 138 vcipher 17, 17, 22 139 vcipher 18, 18, 22 140 141 xxlor 23+32, 9, 9 142 vcipher 15, 15, 23 143 vcipher 16, 16, 23 144 vcipher 17, 17, 23 145 vcipher 18, 18, 23 146.endm 147 148# 8x loops 149# v15 - v22 - input states 150# vs1 - vs9 - round keys 151# 152.macro Loop_aes_middle8x 153 xxlor 23+32, 1, 1 154 xxlor 24+32, 2, 2 155 xxlor 25+32, 3, 3 156 xxlor 26+32, 4, 4 157 158 vcipher 15, 15, 23 159 vcipher 16, 16, 23 160 vcipher 17, 17, 23 161 vcipher 18, 18, 23 162 vcipher 19, 19, 23 163 vcipher 20, 20, 23 164 vcipher 21, 21, 23 165 vcipher 22, 22, 23 166 167 vcipher 15, 15, 24 168 vcipher 16, 16, 24 169 vcipher 17, 17, 24 170 vcipher 18, 18, 24 171 vcipher 19, 19, 24 172 vcipher 20, 20, 24 173 vcipher 21, 21, 24 174 vcipher 22, 22, 24 175 176 vcipher 15, 15, 25 177 vcipher 16, 16, 25 178 vcipher 17, 17, 25 179 vcipher 18, 18, 25 180 vcipher 19, 19, 25 181 vcipher 20, 20, 25 182 vcipher 21, 21, 25 183 vcipher 22, 22, 25 184 185 vcipher 15, 15, 26 186 vcipher 16, 16, 26 187 vcipher 17, 17, 26 188 vcipher 18, 18, 26 189 vcipher 19, 19, 26 190 vcipher 20, 20, 26 191 vcipher 21, 21, 26 192 vcipher 22, 22, 26 193 194 xxlor 23+32, 5, 5 195 xxlor 24+32, 6, 6 196 xxlor 25+32, 7, 7 197 xxlor 26+32, 8, 8 198 199 vcipher 15, 15, 23 200 vcipher 16, 16, 23 201 vcipher 17, 17, 23 202 vcipher 18, 18, 23 203 vcipher 19, 19, 23 204 vcipher 20, 20, 23 205 vcipher 21, 21, 23 206 vcipher 22, 22, 23 207 208 vcipher 15, 15, 24 209 vcipher 16, 16, 24 210 vcipher 17, 17, 24 211 vcipher 18, 18, 24 212 vcipher 19, 19, 24 213 vcipher 20, 20, 24 214 vcipher 21, 21, 24 215 vcipher 22, 22, 24 216 217 vcipher 15, 15, 25 218 vcipher 16, 16, 25 219 vcipher 17, 17, 25 220 vcipher 18, 18, 25 221 vcipher 19, 19, 25 222 vcipher 20, 20, 25 223 vcipher 21, 21, 25 224 vcipher 22, 22, 25 225 226 vcipher 15, 15, 26 227 vcipher 16, 16, 26 228 vcipher 17, 17, 26 229 vcipher 18, 18, 26 230 vcipher 19, 19, 26 231 vcipher 20, 20, 26 232 vcipher 21, 21, 26 233 vcipher 22, 22, 26 234 235 xxlor 23+32, 9, 9 236 vcipher 15, 15, 23 237 vcipher 16, 16, 23 238 vcipher 17, 17, 23 239 vcipher 18, 18, 23 240 vcipher 19, 19, 23 241 vcipher 20, 20, 23 242 vcipher 21, 21, 23 243 vcipher 22, 22, 23 244.endm 245 246# 247# Compute 4x hash values based on Karatsuba method. 248# 249ppc_aes_gcm_ghash: 250 vxor 15, 15, 0 251 252 xxlxor 29, 29, 29 253 254 vpmsumd 23, 12, 15 # H4.L * X.L 255 vpmsumd 24, 9, 16 256 vpmsumd 25, 6, 17 257 vpmsumd 26, 3, 18 258 259 vxor 23, 23, 24 260 vxor 23, 23, 25 261 vxor 23, 23, 26 # L 262 263 vpmsumd 24, 13, 15 # H4.L * X.H + H4.H * X.L 264 vpmsumd 25, 10, 16 # H3.L * X1.H + H3.H * X1.L 265 vpmsumd 26, 7, 17 266 vpmsumd 27, 4, 18 267 268 vxor 24, 24, 25 269 vxor 24, 24, 26 270 vxor 24, 24, 27 # M 271 272 # sum hash and reduction with H Poly 273 vpmsumd 28, 23, 2 # reduction 274 275 xxlor 29+32, 29, 29 276 vsldoi 26, 24, 29, 8 # mL 277 vsldoi 29, 29, 24, 8 # mH 278 vxor 23, 23, 26 # mL + L 279 280 vsldoi 23, 23, 23, 8 # swap 281 vxor 23, 23, 28 282 283 vpmsumd 24, 14, 15 # H4.H * X.H 284 vpmsumd 25, 11, 16 285 vpmsumd 26, 8, 17 286 vpmsumd 27, 5, 18 287 288 vxor 24, 24, 25 289 vxor 24, 24, 26 290 vxor 24, 24, 27 291 292 vxor 24, 24, 29 293 294 # sum hash and reduction with H Poly 295 vsldoi 27, 23, 23, 8 # swap 296 vpmsumd 23, 23, 2 297 vxor 27, 27, 24 298 vxor 23, 23, 27 299 300 xxlor 32, 23+32, 23+32 # update hash 301 302 blr 303 304# 305# Combine two 4x ghash 306# v15 - v22 - input blocks 307# 308.macro ppc_aes_gcm_ghash2_4x 309 # first 4x hash 310 vxor 15, 15, 0 # Xi + X 311 312 xxlxor 29, 29, 29 313 314 vpmsumd 23, 12, 15 # H4.L * X.L 315 vpmsumd 24, 9, 16 316 vpmsumd 25, 6, 17 317 vpmsumd 26, 3, 18 318 319 vxor 23, 23, 24 320 vxor 23, 23, 25 321 vxor 23, 23, 26 # L 322 323 vpmsumd 24, 13, 15 # H4.L * X.H + H4.H * X.L 324 vpmsumd 25, 10, 16 # H3.L * X1.H + H3.H * X1.L 325 vpmsumd 26, 7, 17 326 vpmsumd 27, 4, 18 327 328 vxor 24, 24, 25 329 vxor 24, 24, 26 330 331 # sum hash and reduction with H Poly 332 vpmsumd 28, 23, 2 # reduction 333 334 xxlor 29+32, 29, 29 335 336 vxor 24, 24, 27 # M 337 vsldoi 26, 24, 29, 8 # mL 338 vsldoi 29, 29, 24, 8 # mH 339 vxor 23, 23, 26 # mL + L 340 341 vsldoi 23, 23, 23, 8 # swap 342 vxor 23, 23, 28 343 344 vpmsumd 24, 14, 15 # H4.H * X.H 345 vpmsumd 25, 11, 16 346 vpmsumd 26, 8, 17 347 vpmsumd 27, 5, 18 348 349 vxor 24, 24, 25 350 vxor 24, 24, 26 351 vxor 24, 24, 27 # H 352 353 vxor 24, 24, 29 # H + mH 354 355 # sum hash and reduction with H Poly 356 vsldoi 27, 23, 23, 8 # swap 357 vpmsumd 23, 23, 2 358 vxor 27, 27, 24 359 vxor 27, 23, 27 # 1st Xi 360 361 # 2nd 4x hash 362 vpmsumd 24, 9, 20 363 vpmsumd 25, 6, 21 364 vpmsumd 26, 3, 22 365 vxor 19, 19, 27 # Xi + X 366 vpmsumd 23, 12, 19 # H4.L * X.L 367 368 vxor 23, 23, 24 369 vxor 23, 23, 25 370 vxor 23, 23, 26 # L 371 372 vpmsumd 24, 13, 19 # H4.L * X.H + H4.H * X.L 373 vpmsumd 25, 10, 20 # H3.L * X1.H + H3.H * X1.L 374 vpmsumd 26, 7, 21 375 vpmsumd 27, 4, 22 376 377 vxor 24, 24, 25 378 vxor 24, 24, 26 379 380 # sum hash and reduction with H Poly 381 vpmsumd 28, 23, 2 # reduction 382 383 xxlor 29+32, 29, 29 384 385 vxor 24, 24, 27 # M 386 vsldoi 26, 24, 29, 8 # mL 387 vsldoi 29, 29, 24, 8 # mH 388 vxor 23, 23, 26 # mL + L 389 390 vsldoi 23, 23, 23, 8 # swap 391 vxor 23, 23, 28 392 393 vpmsumd 24, 14, 19 # H4.H * X.H 394 vpmsumd 25, 11, 20 395 vpmsumd 26, 8, 21 396 vpmsumd 27, 5, 22 397 398 vxor 24, 24, 25 399 vxor 24, 24, 26 400 vxor 24, 24, 27 # H 401 402 vxor 24, 24, 29 # H + mH 403 404 # sum hash and reduction with H Poly 405 vsldoi 27, 23, 23, 8 # swap 406 vpmsumd 23, 23, 2 407 vxor 27, 27, 24 408 vxor 23, 23, 27 409 410 xxlor 32, 23+32, 23+32 # update hash 411 412.endm 413 414# 415# Compute update single hash 416# 417.macro ppc_update_hash_1x 418 vxor 28, 28, 0 419 420 vxor 19, 19, 19 421 422 vpmsumd 22, 3, 28 # L 423 vpmsumd 23, 4, 28 # M 424 vpmsumd 24, 5, 28 # H 425 426 vpmsumd 27, 22, 2 # reduction 427 428 vsldoi 25, 23, 19, 8 # mL 429 vsldoi 26, 19, 23, 8 # mH 430 vxor 22, 22, 25 # LL + LL 431 vxor 24, 24, 26 # HH + HH 432 433 vsldoi 22, 22, 22, 8 # swap 434 vxor 22, 22, 27 435 436 vsldoi 20, 22, 22, 8 # swap 437 vpmsumd 22, 22, 2 # reduction 438 vxor 20, 20, 24 439 vxor 22, 22, 20 440 441 vmr 0, 22 # update hash 442 443.endm 444 445# 446# ppc_aes_gcm_encrypt (const void *inp, void *out, size_t len, 447# const AES_KEY *key, unsigned char iv[16], 448# void *Xip); 449# 450# r3 - inp 451# r4 - out 452# r5 - len 453# r6 - AES round keys 454# r7 - iv 455# r8 - Xi, HPoli, hash keys 456# 457.global ppc_aes_gcm_encrypt 458.align 5 459ppc_aes_gcm_encrypt: 460_ppc_aes_gcm_encrypt: 461 462 stdu 1,-512(1) 463 mflr 0 464 465 std 14,112(1) 466 std 15,120(1) 467 std 16,128(1) 468 std 17,136(1) 469 std 18,144(1) 470 std 19,152(1) 471 std 20,160(1) 472 std 21,168(1) 473 li 9, 256 474 stvx 20, 9, 1 475 addi 9, 9, 16 476 stvx 21, 9, 1 477 addi 9, 9, 16 478 stvx 22, 9, 1 479 addi 9, 9, 16 480 stvx 23, 9, 1 481 addi 9, 9, 16 482 stvx 24, 9, 1 483 addi 9, 9, 16 484 stvx 25, 9, 1 485 addi 9, 9, 16 486 stvx 26, 9, 1 487 addi 9, 9, 16 488 stvx 27, 9, 1 489 addi 9, 9, 16 490 stvx 28, 9, 1 491 addi 9, 9, 16 492 stvx 29, 9, 1 493 addi 9, 9, 16 494 stvx 30, 9, 1 495 addi 9, 9, 16 496 stvx 31, 9, 1 497 std 0, 528(1) 498 499 # Load Xi 500 lxvb16x 32, 0, 8 # load Xi 501 502 # load Hash - h^4, h^3, h^2, h 503 li 10, 32 504 lxvd2x 2+32, 10, 8 # H Poli 505 li 10, 48 506 lxvd2x 3+32, 10, 8 # Hl 507 li 10, 64 508 lxvd2x 4+32, 10, 8 # H 509 li 10, 80 510 lxvd2x 5+32, 10, 8 # Hh 511 512 li 10, 96 513 lxvd2x 6+32, 10, 8 # H^2l 514 li 10, 112 515 lxvd2x 7+32, 10, 8 # H^2 516 li 10, 128 517 lxvd2x 8+32, 10, 8 # H^2h 518 519 li 10, 144 520 lxvd2x 9+32, 10, 8 # H^3l 521 li 10, 160 522 lxvd2x 10+32, 10, 8 # H^3 523 li 10, 176 524 lxvd2x 11+32, 10, 8 # H^3h 525 526 li 10, 192 527 lxvd2x 12+32, 10, 8 # H^4l 528 li 10, 208 529 lxvd2x 13+32, 10, 8 # H^4 530 li 10, 224 531 lxvd2x 14+32, 10, 8 # H^4h 532 533 # initialize ICB: GHASH( IV ), IV - r7 534 lxvb16x 30+32, 0, 7 # load IV - v30 535 536 mr 12, 5 # length 537 li 11, 0 # block index 538 539 # counter 1 540 vxor 31, 31, 31 541 vspltisb 22, 1 542 vsldoi 31, 31, 22,1 # counter 1 543 544 # load round key to VSR 545 lxv 0, 0(6) 546 lxv 1, 0x10(6) 547 lxv 2, 0x20(6) 548 lxv 3, 0x30(6) 549 lxv 4, 0x40(6) 550 lxv 5, 0x50(6) 551 lxv 6, 0x60(6) 552 lxv 7, 0x70(6) 553 lxv 8, 0x80(6) 554 lxv 9, 0x90(6) 555 lxv 10, 0xa0(6) 556 557 # load rounds - 10 (128), 12 (192), 14 (256) 558 lwz 9,240(6) 559 560 # 561 # vxor state, state, w # addroundkey 562 xxlor 32+29, 0, 0 563 vxor 15, 30, 29 # IV + round key - add round key 0 564 565 cmpdi 9, 10 566 beq Loop_aes_gcm_8x 567 568 # load 2 more round keys (v11, v12) 569 lxv 11, 0xb0(6) 570 lxv 12, 0xc0(6) 571 572 cmpdi 9, 12 573 beq Loop_aes_gcm_8x 574 575 # load 2 more round keys (v11, v12, v13, v14) 576 lxv 13, 0xd0(6) 577 lxv 14, 0xe0(6) 578 cmpdi 9, 14 579 beq Loop_aes_gcm_8x 580 581 b aes_gcm_out 582 583.align 5 584Loop_aes_gcm_8x: 585 mr 14, 3 586 mr 9, 4 587 588 # n blocks 589 li 10, 128 590 divdu 10, 5, 10 # n 128 bytes-blocks 591 cmpdi 10, 0 592 beq Loop_last_block 593 594 vaddudm 30, 30, 31 # IV + counter 595 vxor 16, 30, 29 596 vaddudm 30, 30, 31 597 vxor 17, 30, 29 598 vaddudm 30, 30, 31 599 vxor 18, 30, 29 600 vaddudm 30, 30, 31 601 vxor 19, 30, 29 602 vaddudm 30, 30, 31 603 vxor 20, 30, 29 604 vaddudm 30, 30, 31 605 vxor 21, 30, 29 606 vaddudm 30, 30, 31 607 vxor 22, 30, 29 608 609 mtctr 10 610 611 li 15, 16 612 li 16, 32 613 li 17, 48 614 li 18, 64 615 li 19, 80 616 li 20, 96 617 li 21, 112 618 619 lwz 10, 240(6) 620 621Loop_8x_block: 622 623 lxvb16x 15, 0, 14 # load block 624 lxvb16x 16, 15, 14 # load block 625 lxvb16x 17, 16, 14 # load block 626 lxvb16x 18, 17, 14 # load block 627 lxvb16x 19, 18, 14 # load block 628 lxvb16x 20, 19, 14 # load block 629 lxvb16x 21, 20, 14 # load block 630 lxvb16x 22, 21, 14 # load block 631 addi 14, 14, 128 632 633 Loop_aes_middle8x 634 635 xxlor 23+32, 10, 10 636 637 cmpdi 10, 10 638 beq Do_next_ghash 639 640 # 192 bits 641 xxlor 24+32, 11, 11 642 643 vcipher 15, 15, 23 644 vcipher 16, 16, 23 645 vcipher 17, 17, 23 646 vcipher 18, 18, 23 647 vcipher 19, 19, 23 648 vcipher 20, 20, 23 649 vcipher 21, 21, 23 650 vcipher 22, 22, 23 651 652 vcipher 15, 15, 24 653 vcipher 16, 16, 24 654 vcipher 17, 17, 24 655 vcipher 18, 18, 24 656 vcipher 19, 19, 24 657 vcipher 20, 20, 24 658 vcipher 21, 21, 24 659 vcipher 22, 22, 24 660 661 xxlor 23+32, 12, 12 662 663 cmpdi 10, 12 664 beq Do_next_ghash 665 666 # 256 bits 667 xxlor 24+32, 13, 13 668 669 vcipher 15, 15, 23 670 vcipher 16, 16, 23 671 vcipher 17, 17, 23 672 vcipher 18, 18, 23 673 vcipher 19, 19, 23 674 vcipher 20, 20, 23 675 vcipher 21, 21, 23 676 vcipher 22, 22, 23 677 678 vcipher 15, 15, 24 679 vcipher 16, 16, 24 680 vcipher 17, 17, 24 681 vcipher 18, 18, 24 682 vcipher 19, 19, 24 683 vcipher 20, 20, 24 684 vcipher 21, 21, 24 685 vcipher 22, 22, 24 686 687 xxlor 23+32, 14, 14 688 689 cmpdi 10, 14 690 beq Do_next_ghash 691 b aes_gcm_out 692 693Do_next_ghash: 694 695 # 696 # last round 697 vcipherlast 15, 15, 23 698 vcipherlast 16, 16, 23 699 700 xxlxor 47, 47, 15 701 stxvb16x 47, 0, 9 # store output 702 xxlxor 48, 48, 16 703 stxvb16x 48, 15, 9 # store output 704 705 vcipherlast 17, 17, 23 706 vcipherlast 18, 18, 23 707 708 xxlxor 49, 49, 17 709 stxvb16x 49, 16, 9 # store output 710 xxlxor 50, 50, 18 711 stxvb16x 50, 17, 9 # store output 712 713 vcipherlast 19, 19, 23 714 vcipherlast 20, 20, 23 715 716 xxlxor 51, 51, 19 717 stxvb16x 51, 18, 9 # store output 718 xxlxor 52, 52, 20 719 stxvb16x 52, 19, 9 # store output 720 721 vcipherlast 21, 21, 23 722 vcipherlast 22, 22, 23 723 724 xxlxor 53, 53, 21 725 stxvb16x 53, 20, 9 # store output 726 xxlxor 54, 54, 22 727 stxvb16x 54, 21, 9 # store output 728 729 addi 9, 9, 128 730 731 # ghash here 732 ppc_aes_gcm_ghash2_4x 733 734 xxlor 27+32, 0, 0 735 vaddudm 30, 30, 31 # IV + counter 736 vmr 29, 30 737 vxor 15, 30, 27 # add round key 738 vaddudm 30, 30, 31 739 vxor 16, 30, 27 740 vaddudm 30, 30, 31 741 vxor 17, 30, 27 742 vaddudm 30, 30, 31 743 vxor 18, 30, 27 744 vaddudm 30, 30, 31 745 vxor 19, 30, 27 746 vaddudm 30, 30, 31 747 vxor 20, 30, 27 748 vaddudm 30, 30, 31 749 vxor 21, 30, 27 750 vaddudm 30, 30, 31 751 vxor 22, 30, 27 752 753 addi 12, 12, -128 754 addi 11, 11, 128 755 756 bdnz Loop_8x_block 757 758 vmr 30, 29 759 760Loop_last_block: 761 cmpdi 12, 0 762 beq aes_gcm_out 763 764 # loop last few blocks 765 li 10, 16 766 divdu 10, 12, 10 767 768 mtctr 10 769 770 lwz 10, 240(6) 771 772 cmpdi 12, 16 773 blt Final_block 774 775.macro Loop_aes_middle_1x 776 xxlor 19+32, 1, 1 777 xxlor 20+32, 2, 2 778 xxlor 21+32, 3, 3 779 xxlor 22+32, 4, 4 780 781 vcipher 15, 15, 19 782 vcipher 15, 15, 20 783 vcipher 15, 15, 21 784 vcipher 15, 15, 22 785 786 xxlor 19+32, 5, 5 787 xxlor 20+32, 6, 6 788 xxlor 21+32, 7, 7 789 xxlor 22+32, 8, 8 790 791 vcipher 15, 15, 19 792 vcipher 15, 15, 20 793 vcipher 15, 15, 21 794 vcipher 15, 15, 22 795 796 xxlor 19+32, 9, 9 797 vcipher 15, 15, 19 798.endm 799 800Next_rem_block: 801 lxvb16x 15, 0, 14 # load block 802 803 Loop_aes_middle_1x 804 805 xxlor 23+32, 10, 10 806 807 cmpdi 10, 10 808 beq Do_next_1x 809 810 # 192 bits 811 xxlor 24+32, 11, 11 812 813 vcipher 15, 15, 23 814 vcipher 15, 15, 24 815 816 xxlor 23+32, 12, 12 817 818 cmpdi 10, 12 819 beq Do_next_1x 820 821 # 256 bits 822 xxlor 24+32, 13, 13 823 824 vcipher 15, 15, 23 825 vcipher 15, 15, 24 826 827 xxlor 23+32, 14, 14 828 829 cmpdi 10, 14 830 beq Do_next_1x 831 832Do_next_1x: 833 vcipherlast 15, 15, 23 834 835 xxlxor 47, 47, 15 836 stxvb16x 47, 0, 9 # store output 837 addi 14, 14, 16 838 addi 9, 9, 16 839 840 vmr 28, 15 841 ppc_update_hash_1x 842 843 addi 12, 12, -16 844 addi 11, 11, 16 845 xxlor 19+32, 0, 0 846 vaddudm 30, 30, 31 # IV + counter 847 vxor 15, 30, 19 # add round key 848 849 bdnz Next_rem_block 850 851 cmpdi 12, 0 852 beq aes_gcm_out 853 854Final_block: 855 Loop_aes_middle_1x 856 857 xxlor 23+32, 10, 10 858 859 cmpdi 10, 10 860 beq Do_final_1x 861 862 # 192 bits 863 xxlor 24+32, 11, 11 864 865 vcipher 15, 15, 23 866 vcipher 15, 15, 24 867 868 xxlor 23+32, 12, 12 869 870 cmpdi 10, 12 871 beq Do_final_1x 872 873 # 256 bits 874 xxlor 24+32, 13, 13 875 876 vcipher 15, 15, 23 877 vcipher 15, 15, 24 878 879 xxlor 23+32, 14, 14 880 881 cmpdi 10, 14 882 beq Do_final_1x 883 884Do_final_1x: 885 vcipherlast 15, 15, 23 886 887 lxvb16x 15, 0, 14 # load last block 888 xxlxor 47, 47, 15 889 890 # create partial block mask 891 li 15, 16 892 sub 15, 15, 12 # index to the mask 893 894 vspltisb 16, -1 # first 16 bytes - 0xffff...ff 895 vspltisb 17, 0 # second 16 bytes - 0x0000...00 896 li 10, 192 897 stvx 16, 10, 1 898 addi 10, 10, 16 899 stvx 17, 10, 1 900 901 addi 10, 1, 192 902 lxvb16x 16, 15, 10 # load partial block mask 903 xxland 47, 47, 16 904 905 vmr 28, 15 906 ppc_update_hash_1x 907 908 # * should store only the remaining bytes. 909 bl Write_partial_block 910 911 b aes_gcm_out 912 913# 914# Write partial block 915# r9 - output 916# r12 - remaining bytes 917# v15 - partial input data 918# 919Write_partial_block: 920 li 10, 192 921 stxvb16x 15+32, 10, 1 # last block 922 923 #add 10, 9, 11 # Output 924 addi 10, 9, -1 925 addi 16, 1, 191 926 927 mtctr 12 # remaining bytes 928 li 15, 0 929 930Write_last_byte: 931 lbzu 14, 1(16) 932 stbu 14, 1(10) 933 bdnz Write_last_byte 934 blr 935 936aes_gcm_out: 937 # out = state 938 stxvb16x 32, 0, 8 # write out Xi 939 add 3, 11, 12 # return count 940 941 li 9, 256 942 lvx 20, 9, 1 943 addi 9, 9, 16 944 lvx 21, 9, 1 945 addi 9, 9, 16 946 lvx 22, 9, 1 947 addi 9, 9, 16 948 lvx 23, 9, 1 949 addi 9, 9, 16 950 lvx 24, 9, 1 951 addi 9, 9, 16 952 lvx 25, 9, 1 953 addi 9, 9, 16 954 lvx 26, 9, 1 955 addi 9, 9, 16 956 lvx 27, 9, 1 957 addi 9, 9, 16 958 lvx 28, 9, 1 959 addi 9, 9, 16 960 lvx 29, 9, 1 961 addi 9, 9, 16 962 lvx 30, 9, 1 963 addi 9, 9, 16 964 lvx 31, 9, 1 965 966 ld 0, 528(1) 967 ld 14,112(1) 968 ld 15,120(1) 969 ld 16,128(1) 970 ld 17,136(1) 971 ld 18,144(1) 972 ld 19,152(1) 973 ld 20,160(1) 974 ld 21,168(1) 975 976 mtlr 0 977 addi 1, 1, 512 978 blr 979 980# 981# 8x Decrypt 982# 983.global ppc_aes_gcm_decrypt 984.align 5 985ppc_aes_gcm_decrypt: 986_ppc_aes_gcm_decrypt: 987 988 stdu 1,-512(1) 989 mflr 0 990 991 std 14,112(1) 992 std 15,120(1) 993 std 16,128(1) 994 std 17,136(1) 995 std 18,144(1) 996 std 19,152(1) 997 std 20,160(1) 998 std 21,168(1) 999 li 9, 256 1000 stvx 20, 9, 1 1001 addi 9, 9, 16 1002 stvx 21, 9, 1 1003 addi 9, 9, 16 1004 stvx 22, 9, 1 1005 addi 9, 9, 16 1006 stvx 23, 9, 1 1007 addi 9, 9, 16 1008 stvx 24, 9, 1 1009 addi 9, 9, 16 1010 stvx 25, 9, 1 1011 addi 9, 9, 16 1012 stvx 26, 9, 1 1013 addi 9, 9, 16 1014 stvx 27, 9, 1 1015 addi 9, 9, 16 1016 stvx 28, 9, 1 1017 addi 9, 9, 16 1018 stvx 29, 9, 1 1019 addi 9, 9, 16 1020 stvx 30, 9, 1 1021 addi 9, 9, 16 1022 stvx 31, 9, 1 1023 std 0, 528(1) 1024 1025 # Load Xi 1026 lxvb16x 32, 0, 8 # load Xi 1027 1028 # load Hash - h^4, h^3, h^2, h 1029 li 10, 32 1030 lxvd2x 2+32, 10, 8 # H Poli 1031 li 10, 48 1032 lxvd2x 3+32, 10, 8 # Hl 1033 li 10, 64 1034 lxvd2x 4+32, 10, 8 # H 1035 li 10, 80 1036 lxvd2x 5+32, 10, 8 # Hh 1037 1038 li 10, 96 1039 lxvd2x 6+32, 10, 8 # H^2l 1040 li 10, 112 1041 lxvd2x 7+32, 10, 8 # H^2 1042 li 10, 128 1043 lxvd2x 8+32, 10, 8 # H^2h 1044 1045 li 10, 144 1046 lxvd2x 9+32, 10, 8 # H^3l 1047 li 10, 160 1048 lxvd2x 10+32, 10, 8 # H^3 1049 li 10, 176 1050 lxvd2x 11+32, 10, 8 # H^3h 1051 1052 li 10, 192 1053 lxvd2x 12+32, 10, 8 # H^4l 1054 li 10, 208 1055 lxvd2x 13+32, 10, 8 # H^4 1056 li 10, 224 1057 lxvd2x 14+32, 10, 8 # H^4h 1058 1059 # initialize ICB: GHASH( IV ), IV - r7 1060 lxvb16x 30+32, 0, 7 # load IV - v30 1061 1062 mr 12, 5 # length 1063 li 11, 0 # block index 1064 1065 # counter 1 1066 vxor 31, 31, 31 1067 vspltisb 22, 1 1068 vsldoi 31, 31, 22,1 # counter 1 1069 1070 # load round key to VSR 1071 lxv 0, 0(6) 1072 lxv 1, 0x10(6) 1073 lxv 2, 0x20(6) 1074 lxv 3, 0x30(6) 1075 lxv 4, 0x40(6) 1076 lxv 5, 0x50(6) 1077 lxv 6, 0x60(6) 1078 lxv 7, 0x70(6) 1079 lxv 8, 0x80(6) 1080 lxv 9, 0x90(6) 1081 lxv 10, 0xa0(6) 1082 1083 # load rounds - 10 (128), 12 (192), 14 (256) 1084 lwz 9,240(6) 1085 1086 # 1087 # vxor state, state, w # addroundkey 1088 xxlor 32+29, 0, 0 1089 vxor 15, 30, 29 # IV + round key - add round key 0 1090 1091 cmpdi 9, 10 1092 beq Loop_aes_gcm_8x_dec 1093 1094 # load 2 more round keys (v11, v12) 1095 lxv 11, 0xb0(6) 1096 lxv 12, 0xc0(6) 1097 1098 cmpdi 9, 12 1099 beq Loop_aes_gcm_8x_dec 1100 1101 # load 2 more round keys (v11, v12, v13, v14) 1102 lxv 13, 0xd0(6) 1103 lxv 14, 0xe0(6) 1104 cmpdi 9, 14 1105 beq Loop_aes_gcm_8x_dec 1106 1107 b aes_gcm_out 1108 1109.align 5 1110Loop_aes_gcm_8x_dec: 1111 mr 14, 3 1112 mr 9, 4 1113 1114 # n blocks 1115 li 10, 128 1116 divdu 10, 5, 10 # n 128 bytes-blocks 1117 cmpdi 10, 0 1118 beq Loop_last_block_dec 1119 1120 vaddudm 30, 30, 31 # IV + counter 1121 vxor 16, 30, 29 1122 vaddudm 30, 30, 31 1123 vxor 17, 30, 29 1124 vaddudm 30, 30, 31 1125 vxor 18, 30, 29 1126 vaddudm 30, 30, 31 1127 vxor 19, 30, 29 1128 vaddudm 30, 30, 31 1129 vxor 20, 30, 29 1130 vaddudm 30, 30, 31 1131 vxor 21, 30, 29 1132 vaddudm 30, 30, 31 1133 vxor 22, 30, 29 1134 1135 mtctr 10 1136 1137 li 15, 16 1138 li 16, 32 1139 li 17, 48 1140 li 18, 64 1141 li 19, 80 1142 li 20, 96 1143 li 21, 112 1144 1145 lwz 10, 240(6) 1146 1147Loop_8x_block_dec: 1148 1149 lxvb16x 15, 0, 14 # load block 1150 lxvb16x 16, 15, 14 # load block 1151 lxvb16x 17, 16, 14 # load block 1152 lxvb16x 18, 17, 14 # load block 1153 lxvb16x 19, 18, 14 # load block 1154 lxvb16x 20, 19, 14 # load block 1155 lxvb16x 21, 20, 14 # load block 1156 lxvb16x 22, 21, 14 # load block 1157 addi 14, 14, 128 1158 1159 Loop_aes_middle8x 1160 1161 xxlor 23+32, 10, 10 1162 1163 cmpdi 10, 10 1164 beq Do_last_aes_dec 1165 1166 # 192 bits 1167 xxlor 24+32, 11, 11 1168 1169 vcipher 15, 15, 23 1170 vcipher 16, 16, 23 1171 vcipher 17, 17, 23 1172 vcipher 18, 18, 23 1173 vcipher 19, 19, 23 1174 vcipher 20, 20, 23 1175 vcipher 21, 21, 23 1176 vcipher 22, 22, 23 1177 1178 vcipher 15, 15, 24 1179 vcipher 16, 16, 24 1180 vcipher 17, 17, 24 1181 vcipher 18, 18, 24 1182 vcipher 19, 19, 24 1183 vcipher 20, 20, 24 1184 vcipher 21, 21, 24 1185 vcipher 22, 22, 24 1186 1187 xxlor 23+32, 12, 12 1188 1189 cmpdi 10, 12 1190 beq Do_last_aes_dec 1191 1192 # 256 bits 1193 xxlor 24+32, 13, 13 1194 1195 vcipher 15, 15, 23 1196 vcipher 16, 16, 23 1197 vcipher 17, 17, 23 1198 vcipher 18, 18, 23 1199 vcipher 19, 19, 23 1200 vcipher 20, 20, 23 1201 vcipher 21, 21, 23 1202 vcipher 22, 22, 23 1203 1204 vcipher 15, 15, 24 1205 vcipher 16, 16, 24 1206 vcipher 17, 17, 24 1207 vcipher 18, 18, 24 1208 vcipher 19, 19, 24 1209 vcipher 20, 20, 24 1210 vcipher 21, 21, 24 1211 vcipher 22, 22, 24 1212 1213 xxlor 23+32, 14, 14 1214 1215 cmpdi 10, 14 1216 beq Do_last_aes_dec 1217 b aes_gcm_out 1218 1219Do_last_aes_dec: 1220 1221 # 1222 # last round 1223 vcipherlast 15, 15, 23 1224 vcipherlast 16, 16, 23 1225 1226 xxlxor 47, 47, 15 1227 stxvb16x 47, 0, 9 # store output 1228 xxlxor 48, 48, 16 1229 stxvb16x 48, 15, 9 # store output 1230 1231 vcipherlast 17, 17, 23 1232 vcipherlast 18, 18, 23 1233 1234 xxlxor 49, 49, 17 1235 stxvb16x 49, 16, 9 # store output 1236 xxlxor 50, 50, 18 1237 stxvb16x 50, 17, 9 # store output 1238 1239 vcipherlast 19, 19, 23 1240 vcipherlast 20, 20, 23 1241 1242 xxlxor 51, 51, 19 1243 stxvb16x 51, 18, 9 # store output 1244 xxlxor 52, 52, 20 1245 stxvb16x 52, 19, 9 # store output 1246 1247 vcipherlast 21, 21, 23 1248 vcipherlast 22, 22, 23 1249 1250 xxlxor 53, 53, 21 1251 stxvb16x 53, 20, 9 # store output 1252 xxlxor 54, 54, 22 1253 stxvb16x 54, 21, 9 # store output 1254 1255 addi 9, 9, 128 1256 1257 xxlor 15+32, 15, 15 1258 xxlor 16+32, 16, 16 1259 xxlor 17+32, 17, 17 1260 xxlor 18+32, 18, 18 1261 xxlor 19+32, 19, 19 1262 xxlor 20+32, 20, 20 1263 xxlor 21+32, 21, 21 1264 xxlor 22+32, 22, 22 1265 1266 # ghash here 1267 ppc_aes_gcm_ghash2_4x 1268 1269 xxlor 27+32, 0, 0 1270 vaddudm 30, 30, 31 # IV + counter 1271 vmr 29, 30 1272 vxor 15, 30, 27 # add round key 1273 vaddudm 30, 30, 31 1274 vxor 16, 30, 27 1275 vaddudm 30, 30, 31 1276 vxor 17, 30, 27 1277 vaddudm 30, 30, 31 1278 vxor 18, 30, 27 1279 vaddudm 30, 30, 31 1280 vxor 19, 30, 27 1281 vaddudm 30, 30, 31 1282 vxor 20, 30, 27 1283 vaddudm 30, 30, 31 1284 vxor 21, 30, 27 1285 vaddudm 30, 30, 31 1286 vxor 22, 30, 27 1287 addi 12, 12, -128 1288 addi 11, 11, 128 1289 1290 bdnz Loop_8x_block_dec 1291 1292 vmr 30, 29 1293 1294Loop_last_block_dec: 1295 cmpdi 12, 0 1296 beq aes_gcm_out 1297 1298 # loop last few blocks 1299 li 10, 16 1300 divdu 10, 12, 10 1301 1302 mtctr 10 1303 1304 lwz 10,240(6) 1305 1306 cmpdi 12, 16 1307 blt Final_block_dec 1308 1309Next_rem_block_dec: 1310 lxvb16x 15, 0, 14 # load block 1311 1312 Loop_aes_middle_1x 1313 1314 xxlor 23+32, 10, 10 1315 1316 cmpdi 10, 10 1317 beq Do_next_1x_dec 1318 1319 # 192 bits 1320 xxlor 24+32, 11, 11 1321 1322 vcipher 15, 15, 23 1323 vcipher 15, 15, 24 1324 1325 xxlor 23+32, 12, 12 1326 1327 cmpdi 10, 12 1328 beq Do_next_1x_dec 1329 1330 # 256 bits 1331 xxlor 24+32, 13, 13 1332 1333 vcipher 15, 15, 23 1334 vcipher 15, 15, 24 1335 1336 xxlor 23+32, 14, 14 1337 1338 cmpdi 10, 14 1339 beq Do_next_1x_dec 1340 1341Do_next_1x_dec: 1342 vcipherlast 15, 15, 23 1343 1344 xxlxor 47, 47, 15 1345 stxvb16x 47, 0, 9 # store output 1346 addi 14, 14, 16 1347 addi 9, 9, 16 1348 1349 xxlor 28+32, 15, 15 1350 ppc_update_hash_1x 1351 1352 addi 12, 12, -16 1353 addi 11, 11, 16 1354 xxlor 19+32, 0, 0 1355 vaddudm 30, 30, 31 # IV + counter 1356 vxor 15, 30, 19 # add round key 1357 1358 bdnz Next_rem_block_dec 1359 1360 cmpdi 12, 0 1361 beq aes_gcm_out 1362 1363Final_block_dec: 1364 Loop_aes_middle_1x 1365 1366 xxlor 23+32, 10, 10 1367 1368 cmpdi 10, 10 1369 beq Do_final_1x_dec 1370 1371 # 192 bits 1372 xxlor 24+32, 11, 11 1373 1374 vcipher 15, 15, 23 1375 vcipher 15, 15, 24 1376 1377 xxlor 23+32, 12, 12 1378 1379 cmpdi 10, 12 1380 beq Do_final_1x_dec 1381 1382 # 256 bits 1383 xxlor 24+32, 13, 13 1384 1385 vcipher 15, 15, 23 1386 vcipher 15, 15, 24 1387 1388 xxlor 23+32, 14, 14 1389 1390 cmpdi 10, 14 1391 beq Do_final_1x_dec 1392 1393Do_final_1x_dec: 1394 vcipherlast 15, 15, 23 1395 1396 lxvb16x 15, 0, 14 # load block 1397 xxlxor 47, 47, 15 1398 1399 # create partial block mask 1400 li 15, 16 1401 sub 15, 15, 12 # index to the mask 1402 1403 vspltisb 16, -1 # first 16 bytes - 0xffff...ff 1404 vspltisb 17, 0 # second 16 bytes - 0x0000...00 1405 li 10, 192 1406 stvx 16, 10, 1 1407 addi 10, 10, 16 1408 stvx 17, 10, 1 1409 1410 addi 10, 1, 192 1411 lxvb16x 16, 15, 10 # load block mask 1412 xxland 47, 47, 16 1413 1414 xxlor 28+32, 15, 15 1415 ppc_update_hash_1x 1416 1417 # * should store only the remaining bytes. 1418 bl Write_partial_block 1419 1420 b aes_gcm_out 1421 1422 1423___ 1424 1425foreach (split("\n",$code)) { 1426 s/\`([^\`]*)\`/eval $1/geo; 1427 1428 if ($flavour =~ /le$/o) { # little-endian 1429 s/le\?//o or 1430 s/be\?/#be#/o; 1431 } else { 1432 s/le\?/#le#/o or 1433 s/be\?//o; 1434 } 1435 print $_,"\n"; 1436} 1437 1438close STDOUT or die "error closing STDOUT: $!"; # enforce flush 1439