1#! /usr/bin/env perl 2# Copyright 2020-2023 The OpenSSL Project Authors. All Rights Reserved. 3# 4# Licensed under the Apache License 2.0 (the "License"). You may not use 5# this file except in compliance with the License. You can obtain a copy 6# in the file LICENSE in the source distribution or at 7# https://www.openssl.org/source/license.html 8 9# 10#======================================================================== 11# Written by Xiaokang Qian <xiaokang.qian@arm.com> for the OpenSSL project, 12# derived from https://github.com/ARM-software/AArch64cryptolib, original 13# author Samuel Lee <Samuel.Lee@arm.com>. The module is, however, dual 14# licensed under OpenSSL and SPDX BSD-3-Clause licenses depending on where you 15# obtain it. 16#======================================================================== 17# 18# Approach - We want to reload constants as we have plenty of spare ASIMD slots around crypto units for loading 19# Unroll x8 in main loop, main loop to act on 8 16B blocks per iteration, and then do modulo of the accumulated 20# intermediate hashesfrom the 8 blocks. 21# 22# ____________________________________________________ 23# | | 24# | PRE | 25# |____________________________________________________| 26# | | | | 27# | CTR block 8k+13| AES block 8k+8 | GHASH block 8k+0 | 28# |________________|________________|__________________| 29# | | | | 30# | CTR block 8k+14| AES block 8k+9 | GHASH block 8k+1 | 31# |________________|________________|__________________| 32# | | | | 33# | CTR block 8k+15| AES block 8k+10| GHASH block 8k+2 | 34# |________________|________________|__________________| 35# | | | | 36# | CTR block 8k+16| AES block 8k+11| GHASH block 8k+3 | 37# |________________|________________|__________________| 38# | | | | 39# | CTR block 8k+17| AES block 8k+12| GHASH block 8k+4 | 40# |________________|________________|__________________| 41# | | | | 42# | CTR block 8k+18| AES block 8k+13| GHASH block 8k+5 | 43# |________________|________________|__________________| 44# | | | | 45# | CTR block 8k+19| AES block 8k+14| GHASH block 8k+6 | 46# |________________|________________|__________________| 47# | | | | 48# | CTR block 8k+20| AES block 8k+15| GHASH block 8k+7 | 49# |________________|____(mostly)____|__________________| 50# | | 51# | MODULO | 52# |____________________________________________________| 53# 54# PRE: 55# Ensure previous generated intermediate hash is aligned and merged with result for GHASH 4k+0 56# EXT low_acc, low_acc, low_acc, #8 57# EOR res_curr (8k+0), res_curr (4k+0), low_acc 58# 59# CTR block: 60# Increment and byte reverse counter in scalar registers and transfer to SIMD registers 61# REV ctr32, rev_ctr32 62# ORR ctr64, constctr96_top32, ctr32, LSL #32 63# INS ctr_next.d[0], constctr96_bottom64 // Keeping this in scalar registers to free up space in SIMD RF 64# INS ctr_next.d[1], ctr64X 65# ADD rev_ctr32, #1 66# 67# AES block: 68# Do AES encryption/decryption on CTR block X and EOR it with input block X. Take 256 bytes key below for example. 69# Doing small trick here of loading input in scalar registers, EORing with last key and then transferring 70# Given we are very constrained in our ASIMD registers this is quite important 71# 72# Encrypt: 73# LDR input_low, [ input_ptr ], #8 74# LDR input_high, [ input_ptr ], #8 75# EOR input_low, k14_low 76# EOR input_high, k14_high 77# INS res_curr.d[0], input_low 78# INS res_curr.d[1], input_high 79# AESE ctr_curr, k0; AESMC ctr_curr, ctr_curr 80# AESE ctr_curr, k1; AESMC ctr_curr, ctr_curr 81# AESE ctr_curr, k2; AESMC ctr_curr, ctr_curr 82# AESE ctr_curr, k3; AESMC ctr_curr, ctr_curr 83# AESE ctr_curr, k4; AESMC ctr_curr, ctr_curr 84# AESE ctr_curr, k5; AESMC ctr_curr, ctr_curr 85# AESE ctr_curr, k6; AESMC ctr_curr, ctr_curr 86# AESE ctr_curr, k7; AESMC ctr_curr, ctr_curr 87# AESE ctr_curr, k8; AESMC ctr_curr, ctr_curr 88# AESE ctr_curr, k9; AESMC ctr_curr, ctr_curr 89# AESE ctr_curr, k10; AESMC ctr_curr, ctr_curr 90# AESE ctr_curr, k11; AESMC ctr_curr, ctr_curr 91# AESE ctr_curr, k12; AESMC ctr_curr, ctr_curr 92# AESE ctr_curr, k13 93# EOR res_curr, res_curr, ctr_curr 94# ST1 { res_curr.16b }, [ output_ptr ], #16 95# 96# Decrypt: 97# AESE ctr_curr, k0; AESMC ctr_curr, ctr_curr 98# AESE ctr_curr, k1; AESMC ctr_curr, ctr_curr 99# AESE ctr_curr, k2; AESMC ctr_curr, ctr_curr 100# AESE ctr_curr, k3; AESMC ctr_curr, ctr_curr 101# AESE ctr_curr, k4; AESMC ctr_curr, ctr_curr 102# AESE ctr_curr, k5; AESMC ctr_curr, ctr_curr 103# AESE ctr_curr, k6; AESMC ctr_curr, ctr_curr 104# AESE ctr_curr, k7; AESMC ctr_curr, ctr_curr 105# AESE ctr_curr, k8; AESMC ctr_curr, ctr_curr 106# AESE ctr_curr, k9; AESMC ctr_curr, ctr_curr 107# AESE ctr_curr, k10; AESMC ctr_curr, ctr_curr 108# AESE ctr_curr, k11; AESMC ctr_curr, ctr_curr 109# AESE ctr_curr, k12; AESMC ctr_curr, ctr_curr 110# AESE ctr_curr, k13 111# LDR res_curr, [ input_ptr ], #16 112# EOR res_curr, res_curr, ctr_curr 113# MOV output_low, res_curr.d[0] 114# MOV output_high, res_curr.d[1] 115# EOR output_low, k14_low 116# EOR output_high, k14_high 117# STP output_low, output_high, [ output_ptr ], #16 118 119# GHASH block X: 120# Do 128b karatsuba polynomial multiplication on block 121# We only have 64b->128b polynomial multipliers, naively that means we need to do 4 64b multiplies to generate a 128b 122# 123# multiplication: 124# Pmull(A,B) == (Pmull(Ah,Bh)<<128 | Pmull(Al,Bl)) ^ (Pmull(Ah,Bl) ^ Pmull(Al,Bh))<<64 125# 126# The idea behind Karatsuba multiplication is that we can do just 3 64b multiplies: 127# Pmull(A,B) == (Pmull(Ah,Bh)<<128 | Pmull(Al,Bl)) ^ (Pmull(Ah^Al,Bh^Bl) ^ Pmull(Ah,Bh) ^ Pmull(Al,Bl))<<64 128# 129# There is some complication here because the bit order of GHASH's PMULL is reversed compared to elsewhere, so we are 130# multiplying with "twisted" powers of H 131# 132# Note: We can PMULL directly into the acc_x in first GHASH of the loop 133# Note: For scheduling big cores we want to split the processing to happen over two loop iterations - otherwise the critical 134# path latency dominates the performance 135# 136# This has a knock on effect on register pressure, so we have to be a bit more clever with our temporary registers 137# than indicated here 138# REV64 res_curr, res_curr 139# INS t_m.d[0], res_curr.d[1] 140# EOR t_m.8B, t_m.8B, res_curr.8B 141# PMULL2 t_h, res_curr, HX 142# PMULL t_l, res_curr, HX 143# PMULL t_m, t_m, HX_k 144# EOR acc_h, acc_h, t_h 145# EOR acc_l, acc_l, t_l 146# EOR acc_m, acc_m, t_m 147# 148# MODULO: take the partial accumulators (~representing sum of 256b multiplication results), from GHASH and do modulo reduction on them 149# There is some complication here because the bit order of GHASH's PMULL is reversed compared to elsewhere, so we are doing modulo 150# with a reversed constant 151# EOR3 acc_m, acc_m, acc_l, acc_h // Finish off karatsuba processing 152# PMULL t_mod, acc_h, mod_constant 153# EXT acc_h, acc_h, acc_h, #8 154# EOR3 acc_m, acc_m, t_mod, acc_h 155# PMULL acc_h, acc_m, mod_constant 156# EXT acc_m, acc_m, acc_m, #8 157# EOR3 acc_l, acc_l, acc_m, acc_h 158 159$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef; 160$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef; 161 162$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 163( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or 164( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate ) or 165die "can't locate arm-xlate.pl"; 166 167die "only for 64 bit" if $flavour !~ /64/; 168 169open OUT,"| \"$^X\" $xlate $flavour $output"; 170*STDOUT=*OUT; 171 172$code=<<___; 173#include "arm_arch.h" 174 175#if __ARM_MAX_ARCH__>=8 176___ 177$code.=".arch armv8-a+crypto\n.text\n"; 178 179$input_ptr="x0"; #argument block 180$bit_length="x1"; 181$byte_length="x9"; 182$output_ptr="x2"; 183$current_tag="x3"; 184$counter="x16"; 185$constant_temp="x15"; 186$modulo_constant="x10"; 187$cc="x8"; 188{ 189my ($end_input_ptr,$main_end_input_ptr,$temp0_x,$temp1_x)=map("x$_",(4..7)); 190my ($temp2_x,$temp3_x)=map("x$_",(13..14)); 191my ($ctr0b,$ctr1b,$ctr2b,$ctr3b,$ctr4b,$ctr5b,$ctr6b,$ctr7b,$res0b,$res1b,$res2b,$res3b,$res4b,$res5b,$res6b,$res7b)=map("v$_.16b",(0..15)); 192my ($ctr0,$ctr1,$ctr2,$ctr3,$ctr4,$ctr5,$ctr6,$ctr7,$res0,$res1,$res2,$res3,$res4,$res5,$res6,$res7)=map("v$_",(0..15)); 193my ($ctr0d,$ctr1d,$ctr2d,$ctr3d,$ctr4d,$ctr5d,$ctr6d,$ctr7d)=map("d$_",(0..7)); 194my ($ctr0q,$ctr1q,$ctr2q,$ctr3q,$ctr4q,$ctr5q,$ctr6q,$ctr7q)=map("q$_",(0..7)); 195my ($res0q,$res1q,$res2q,$res3q,$res4q,$res5q,$res6q,$res7q)=map("q$_",(8..15)); 196 197my ($ctr_t0,$ctr_t1,$ctr_t2,$ctr_t3,$ctr_t4,$ctr_t5,$ctr_t6,$ctr_t7)=map("v$_",(8..15)); 198my ($ctr_t0b,$ctr_t1b,$ctr_t2b,$ctr_t3b,$ctr_t4b,$ctr_t5b,$ctr_t6b,$ctr_t7b)=map("v$_.16b",(8..15)); 199my ($ctr_t0q,$ctr_t1q,$ctr_t2q,$ctr_t3q,$ctr_t4q,$ctr_t5q,$ctr_t6q,$ctr_t7q)=map("q$_",(8..15)); 200 201my ($acc_hb,$acc_mb,$acc_lb)=map("v$_.16b",(17..19)); 202my ($acc_h,$acc_m,$acc_l)=map("v$_",(17..19)); 203 204my ($h1,$h12k,$h2,$h3,$h34k,$h4)=map("v$_",(20..25)); 205my ($h5,$h56k,$h6,$h7,$h78k,$h8)=map("v$_",(20..25)); 206my ($h1q,$h12kq,$h2q,$h3q,$h34kq,$h4q)=map("q$_",(20..25)); 207my ($h5q,$h56kq,$h6q,$h7q,$h78kq,$h8q)=map("q$_",(20..25)); 208 209my $t0="v16"; 210my $t0d="d16"; 211 212my $t1="v29"; 213my $t2=$res1; 214my $t3=$t1; 215 216my $t4=$res0; 217my $t5=$res2; 218my $t6=$t0; 219 220my $t7=$res3; 221my $t8=$res4; 222my $t9=$res5; 223 224my $t10=$res6; 225my $t11="v21"; 226my $t12=$t1; 227 228my $rtmp_ctr="v30"; 229my $rtmp_ctrq="q30"; 230my $rctr_inc="v31"; 231my $rctr_incd="d31"; 232 233my $mod_constantd=$t0d; 234my $mod_constant=$t0; 235 236my ($rk0,$rk1,$rk2)=map("v$_.16b",(26..28)); 237my ($rk3,$rk4,$rk5)=map("v$_.16b",(26..28)); 238my ($rk6,$rk7,$rk8)=map("v$_.16b",(26..28)); 239my ($rk9,$rk10,$rk11)=map("v$_.16b",(26..28)); 240my ($rk12,$rk13,$rk14)=map("v$_.16b",(26..28)); 241my ($rk0q,$rk1q,$rk2q)=map("q$_",(26..28)); 242my ($rk3q,$rk4q,$rk5q)=map("q$_",(26..28)); 243my ($rk6q,$rk7q,$rk8q)=map("q$_",(26..28)); 244my ($rk9q,$rk10q,$rk11q)=map("q$_",(26..28)); 245my ($rk12q,$rk13q,$rk14q)=map("q$_",(26..28)); 246my $rk2q1="v28.1q"; 247my $rk3q1="v26.1q"; 248my $rk4v="v27"; 249 250 251######################################################################################### 252# size_t unroll8_eor3_aes_gcm_enc_128_kernel(const uint8_t * plaintext, 253# uint64_t plaintext_length, 254# uint8_t * ciphertext, 255# uint64_t *Xi, 256# unsigned char ivec[16], 257# const void *key); 258# 259$code.=<<___; 260.global unroll8_eor3_aes_gcm_enc_128_kernel 261.type unroll8_eor3_aes_gcm_enc_128_kernel,%function 262.align 4 263unroll8_eor3_aes_gcm_enc_128_kernel: 264 AARCH64_VALID_CALL_TARGET 265 cbz x1, .L128_enc_ret 266 stp d8, d9, [sp, #-80]! 267 lsr $byte_length, $bit_length, #3 268 mov $counter, x4 269 mov $cc, x5 270 stp d10, d11, [sp, #16] 271 stp d12, d13, [sp, #32] 272 stp d14, d15, [sp, #48] 273 mov x5, #0xc200000000000000 274 stp x5, xzr, [sp, #64] 275 add $modulo_constant, sp, #64 276 277 mov $constant_temp, #0x100000000 @ set up counter increment 278 movi $rctr_inc.16b, #0x0 279 mov $rctr_inc.d[1], $constant_temp 280 mov $main_end_input_ptr, $byte_length 281 ld1 { $ctr0b}, [$counter] @ CTR block 0 282 283 sub $main_end_input_ptr, $main_end_input_ptr, #1 @ byte_len - 1 284 285 and $main_end_input_ptr, $main_end_input_ptr, #0xffffffffffffff80 @ number of bytes to be processed in main loop (at least 1 byte must be handled by tail) 286 287 rev32 $rtmp_ctr.16b, $ctr0.16b @ set up reversed counter 288 289 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 0 290 291 rev32 $ctr1.16b, $rtmp_ctr.16b @ CTR block 1 292 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 1 293 294 rev32 $ctr2.16b, $rtmp_ctr.16b @ CTR block 2 295 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 2 296 297 rev32 $ctr3.16b, $rtmp_ctr.16b @ CTR block 3 298 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 3 299 300 rev32 $ctr4.16b, $rtmp_ctr.16b @ CTR block 4 301 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 4 302 303 rev32 $ctr5.16b, $rtmp_ctr.16b @ CTR block 5 304 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 5 305 ldp $rk0q, $rk1q, [$cc, #0] @ load rk0, rk1 306 307 rev32 $ctr6.16b, $rtmp_ctr.16b @ CTR block 6 308 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 6 309 310 rev32 $ctr7.16b, $rtmp_ctr.16b @ CTR block 7 311 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 7 312 313 aese $ctr4b, $rk0 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 0 314 aese $ctr6b, $rk0 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 0 315 aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 0 316 317 aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 0 318 aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 0 319 aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 0 320 321 aese $ctr7b, $rk0 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 0 322 aese $ctr5b, $rk0 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 0 323 ldp $rk2q, $rk3q, [$cc, #32] @ load rk2, rk3 324 325 aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 1 326 327 aese $ctr7b, $rk1 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 1 328 aese $ctr5b, $rk1 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 1 329 aese $ctr4b, $rk1 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 1 330 331 aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 1 332 aese $ctr6b, $rk1 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 1 333 aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 1 334 335 aese $ctr5b, $rk2 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 2 336 aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 1 337 aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 2 338 339 aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 2 340 aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 2 341 aese $ctr7b, $rk2 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 2 342 343 aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 2 344 aese $ctr6b, $rk2 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 2 345 aese $ctr4b, $rk2 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 2 346 347 aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 3 348 349 ldp $rk4q, $rk5q, [$cc, #64] @ load rk4, rk5 350 aese $ctr5b, $rk3 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 3 351 aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 3 352 353 aese $ctr4b, $rk3 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 3 354 aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 3 355 aese $ctr6b, $rk3 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 3 356 357 aese $ctr7b, $rk3 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 3 358 359 aese $ctr6b, $rk4 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 4 360 aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 3 361 aese $ctr5b, $rk4 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 4 362 363 aese $ctr7b, $rk4 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 4 364 aese $ctr4b, $rk4 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 4 365 aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 4 366 367 aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 4 368 aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 4 369 aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 4 370 371 aese $ctr7b, $rk5 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 5 372 aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 5 373 ldp $rk6q, $rk7q, [$cc, #96] @ load rk6, rk7 374 375 aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 5 376 aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 5 377 aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 5 378 379 aese $ctr4b, $rk5 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 5 380 aese $ctr5b, $rk5 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 5 381 aese $ctr6b, $rk5 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 5 382 383 aese $ctr4b, $rk6 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 6 384 aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 6 385 aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 6 386 387 aese $ctr7b, $rk6 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 6 388 aese $ctr6b, $rk6 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 6 389 aese $ctr5b, $rk6 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 6 390 391 aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 6 392 aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 6 393 ldp $rk8q, $rk9q, [$cc, #128] @ load rk8, rk9 394 395 aese $ctr5b, $rk7 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 7 396 397 ld1 { $acc_lb}, [$current_tag] 398 ext $acc_lb, $acc_lb, $acc_lb, #8 399 rev64 $acc_lb, $acc_lb 400 401 aese $ctr7b, $rk7 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 7 402 403 aese $ctr4b, $rk7 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 7 404 aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 7 405 aese $ctr6b, $rk7 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 7 406 407 aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 7 408 aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 7 409 aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 7 410 411 aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 8 412 aese $ctr6b, $rk8 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 8 413 aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 8 414 415 aese $ctr7b, $rk8 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 8 416 aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 8 417 ldr $rk10q, [$cc, #160] @ load rk10 418 419 aese $ctr3b, $rk9 @ AES block 8k+11 - round 9 420 aese $ctr4b, $rk8 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 8 421 aese $ctr2b, $rk9 @ AES block 8k+10 - round 9 422 423 aese $ctr5b, $rk8 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 8 424 aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 8 425 aese $ctr6b, $rk9 @ AES block 8k+14 - round 9 426 427 aese $ctr4b, $rk9 @ AES block 8k+12 - round 9 428 add $main_end_input_ptr, $main_end_input_ptr, $input_ptr 429 aese $ctr0b, $rk9 @ AES block 8k+8 - round 9 430 431 aese $ctr7b, $rk9 @ AES block 8k+15 - round 9 432 aese $ctr5b, $rk9 @ AES block 8k+13 - round 9 433 aese $ctr1b, $rk9 @ AES block 8k+9 - round 9 434 435 add $end_input_ptr, $input_ptr, $bit_length, lsr #3 @ end_input_ptr 436 cmp $input_ptr, $main_end_input_ptr @ check if we have <= 8 blocks 437 b.ge .L128_enc_tail @ handle tail 438 439 ldp $ctr_t0q, $ctr_t1q, [$input_ptr], #32 @ AES block 0, 1 - load plaintext 440 441 ldp $ctr_t2q, $ctr_t3q, [$input_ptr], #32 @ AES block 2, 3 - load plaintext 442 443 ldp $ctr_t4q, $ctr_t5q, [$input_ptr], #32 @ AES block 4, 5 - load plaintext 444 445 ldp $ctr_t6q, $ctr_t7q, [$input_ptr], #32 @ AES block 6, 7 - load plaintext 446 cmp $input_ptr, $main_end_input_ptr @ check if we have <= 8 blocks 447 448 eor3 $res0b, $ctr_t0b, $ctr0b, $rk10 @ AES block 0 - result 449 rev32 $ctr0.16b, $rtmp_ctr.16b @ CTR block 8 450 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8 451 452 eor3 $res1b, $ctr_t1b, $ctr1b, $rk10 @ AES block 1 - result 453 stp $res0q, $res1q, [$output_ptr], #32 @ AES block 0, 1 - store result 454 455 rev32 $ctr1.16b, $rtmp_ctr.16b @ CTR block 9 456 eor3 $res5b, $ctr_t5b, $ctr5b, $rk10 @ AES block 5 - result 457 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 9 458 459 eor3 $res2b, $ctr_t2b, $ctr2b, $rk10 @ AES block 2 - result 460 eor3 $res6b, $ctr_t6b, $ctr6b, $rk10 @ AES block 6 - result 461 eor3 $res4b, $ctr_t4b, $ctr4b, $rk10 @ AES block 4 - result 462 463 rev32 $ctr2.16b, $rtmp_ctr.16b @ CTR block 10 464 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 10 465 466 eor3 $res3b, $ctr_t3b, $ctr3b, $rk10 @ AES block 3 - result 467 eor3 $res7b, $ctr_t7b, $ctr7b,$rk10 @ AES block 7 - result 468 stp $res2q, $res3q, [$output_ptr], #32 @ AES block 2, 3 - store result 469 470 rev32 $ctr3.16b, $rtmp_ctr.16b @ CTR block 11 471 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 11 472 stp $res4q, $res5q, [$output_ptr], #32 @ AES block 4, 5 - store result 473 474 stp $res6q, $res7q, [$output_ptr], #32 @ AES block 6, 7 - store result 475 476 rev32 $ctr4.16b, $rtmp_ctr.16b @ CTR block 12 477 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 12 478 b.ge .L128_enc_prepretail @ do prepretail 479 480.L128_enc_main_loop: @ main loop start 481 rev32 $ctr5.16b, $rtmp_ctr.16b @ CTR block 8k+13 482 ldr $h5q, [$current_tag, #128] @ load h5l | h5h 483 ext $h5.16b, $h5.16b, $h5.16b, #8 484 ldr $h6q, [$current_tag, #160] @ load h6l | h6h 485 ext $h6.16b, $h6.16b, $h6.16b, #8 486 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+13 487 488 rev64 $res1b, $res1b @ GHASH block 8k+1 489 rev64 $res0b, $res0b @ GHASH block 8k 490 ldr $h7q, [$current_tag, #176] @ load h7l | h7h 491 ext $h7.16b, $h7.16b, $h7.16b, #8 492 ldr $h8q, [$current_tag, #208] @ load h8l | h8h 493 ext $h8.16b, $h8.16b, $h8.16b, #8 494 495 rev32 $ctr6.16b, $rtmp_ctr.16b @ CTR block 8k+14 496 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+14 497 ext $acc_lb, $acc_lb, $acc_lb, #8 @ PRE 0 498 499 ldr $h56kq, [$current_tag, #144] @ load h6k | h5k 500 ldr $h78kq, [$current_tag, #192] @ load h8k | h7k 501 rev64 $res5b, $res5b @ GHASH block 8k+5 (t0, t1, t2 and t3 free) 502 rev64 $res3b, $res3b @ GHASH block 8k+3 503 504 ldp $rk0q, $rk1q, [$cc, #0] @ load rk0, rk1 505 eor $res0b, $res0b, $acc_lb @ PRE 1 506 rev32 $ctr7.16b, $rtmp_ctr.16b @ CTR block 8k+15 507 508 rev64 $res7b, $res7b @ GHASH block 8k+7 (t0, t1, t2 and t3 free) 509 510 pmull2 $t0.1q, $res1.2d, $h7.2d @ GHASH block 8k+1 - high 511 rev64 $res2b, $res2b @ GHASH block 8k+2 512 pmull2 $acc_h.1q, $res0.2d, $h8.2d @ GHASH block 8k - high 513 514 pmull $h7.1q, $res1.1d, $h7.1d @ GHASH block 8k+1 - low 515 trn1 $acc_m.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid 516 pmull $acc_l.1q, $res0.1d, $h8.1d @ GHASH block 8k - low 517 518 trn2 $res0.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid 519 pmull2 $t1.1q, $res2.2d, $h6.2d @ GHASH block 8k+2 - high 520 pmull2 $t2.1q, $res3.2d, $h5.2d @ GHASH block 8k+3 - high 521 522 eor $acc_lb, $acc_lb, $h7.16b @ GHASH block 8k+1 - low 523 ldr $h3q, [$current_tag, #80] @ load h3l | h3h 524 ext $h3.16b, $h3.16b, $h3.16b, #8 525 ldr $h4q, [$current_tag, #112] @ load h3l | h3h 526 ext $h4.16b, $h4.16b, $h4.16b, #8 527 aese $ctr5b, $rk0 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 0 528 529 aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 0 530 aese $ctr4b, $rk0 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 0 531 eor $acc_hb, $acc_hb, $t0.16b @ GHASH block 8k+1 - high 532 533 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+15 534 aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 0 535 eor $res0.16b, $res0.16b, $acc_m.16b @ GHASH block 8k, 8k+1 - mid 536 537 aese $ctr6b, $rk0 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 0 538 aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 1 539 aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 0 540 541 aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 1 542 aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 0 543 pmull $h6.1q, $res2.1d, $h6.1d @ GHASH block 8k+2 - low 544 545 aese $ctr5b, $rk1 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 1 546 aese $ctr7b, $rk0 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 0 547 aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 1 548 549 eor3 $acc_hb, $acc_hb, $t1.16b,$t2.16b @ GHASH block 8k+2, 8k+3 - high 550 trn1 $t3.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid 551 trn2 $res2.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid 552 553 ldp $rk2q, $rk3q, [$cc, #32] @ load rk2, rk3 554 aese $ctr4b, $rk1 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 1 555 aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 1 556 557 pmull $h5.1q, $res3.1d, $h5.1d @ GHASH block 8k+3 - low 558 aese $ctr7b, $rk1 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 1 559 aese $ctr6b, $rk1 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 1 560 561 pmull2 $acc_m.1q, $res0.2d, $h78k.2d @ GHASH block 8k - mid 562 eor $res2.16b, $res2.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid 563 pmull $h78k.1q, $res0.1d, $h78k.1d @ GHASH block 8k+1 - mid 564 565 rev64 $res6b, $res6b @ GHASH block 8k+6 (t0, t1, and t2 free) 566 eor3 $acc_lb, $acc_lb, $h6.16b, $h5.16b @ GHASH block 8k+2, 8k+3 - low 567 568 pmull2 $t3.1q, $res2.2d, $h56k.2d @ GHASH block 8k+2 - mid 569 eor $acc_mb, $acc_mb, $h78k.16b @ GHASH block 8k+1 - mid 570 pmull $h56k.1q, $res2.1d, $h56k.1d @ GHASH block 8k+3 - mid 571 572 aese $ctr5b, $rk2 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 2 573 aese $ctr4b, $rk2 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 2 574 aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 2 575 576 aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 2 577 eor3 $acc_mb, $acc_mb, $h56k.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid 578 aese $ctr6b, $rk2 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 2 579 580 aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 2 581 aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 2 582 aese $ctr7b, $rk2 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 2 583 584 aese $ctr6b, $rk3 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 3 585 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k 586 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k 587 rev64 $res4b, $res4b @ GHASH block 8k+4 (t0, t1, and t2 free) 588 589 ldp $rk4q, $rk5q, [$cc, #64] @ load rk4, rk5 590 aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 3 591 aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 3 592 593 ldr $h1q, [$current_tag, #32] @ load h1l | h1h 594 ext $h1.16b, $h1.16b, $h1.16b, #8 595 ldr $h2q, [$current_tag, #64] @ load h1l | h1h 596 ext $h2.16b, $h2.16b, $h2.16b, #8 597 pmull2 $t4.1q, $res4.2d, $h4.2d @ GHASH block 8k+4 - high 598 pmull $h4.1q, $res4.1d, $h4.1d @ GHASH block 8k+4 - low 599 600 trn1 $t6.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid 601 trn2 $res4.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid 602 603 aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 3 604 aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 3 605 606 aese $ctr7b, $rk3 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 3 607 aese $ctr4b, $rk3 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 3 608 609 aese $ctr5b, $rk3 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 3 610 aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 4 611 612 aese $ctr7b, $rk4 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 4 613 aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 4 614 aese $ctr4b, $rk4 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 4 615 616 aese $ctr5b, $rk4 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 4 617 aese $ctr6b, $rk4 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 4 618 aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 4 619 620 pmull2 $t5.1q, $res5.2d, $h3.2d @ GHASH block 8k+5 - high 621 eor $res4.16b, $res4.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid 622 pmull $h3.1q, $res5.1d, $h3.1d @ GHASH block 8k+5 - low 623 624 aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 4 625 ldp $rk6q, $rk7q, [$cc, #96] @ load rk6, rk7 626 trn1 $t9.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid 627 628 pmull2 $t6.1q, $res4.2d, $h34k.2d @ GHASH block 8k+4 - mid 629 pmull $h34k.1q, $res4.1d, $h34k.1d @ GHASH block 8k+5 - mid 630 pmull2 $t7.1q, $res6.2d, $h2.2d @ GHASH block 8k+6 - high 631 632 pmull2 $t8.1q, $res7.2d, $h1.2d @ GHASH block 8k+7 - high 633 aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 5 634 aese $ctr5b, $rk5 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 5 635 636 pmull $h2.1q, $res6.1d, $h2.1d @ GHASH block 8k+6 - low 637 eor3 $acc_hb, $acc_hb, $t4.16b, $t5.16b @ GHASH block 8k+4, 8k+5 - high 638 trn2 $res6.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid 639 640 eor3 $acc_lb, $acc_lb, $h4.16b, $h3.16b @ GHASH block 8k+4, 8k+5 - low 641 aese $ctr6b, $rk5 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 5 642 643 eor $res6.16b, $res6.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid 644 aese $ctr7b, $rk5 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 5 645 aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 5 646 647 aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 5 648 aese $ctr4b, $rk5 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 5 649 aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 5 650 651 eor3 $acc_mb, $acc_mb, $h34k.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid 652 ldr $mod_constantd, [$modulo_constant] @ MODULO - load modulo constant 653 pmull $h1.1q, $res7.1d, $h1.1d @ GHASH block 8k+7 - low 654 655 aese $ctr7b, $rk6 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 6 656 aese $ctr5b, $rk6 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 6 657 658 pmull2 $t9.1q, $res6.2d, $h12k.2d @ GHASH block 8k+6 - mid 659 aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 6 660 aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 6 661 662 pmull $h12k.1q, $res6.1d, $h12k.1d @ GHASH block 8k+7 - mid 663 eor3 $acc_lb, $acc_lb, $h2.16b, $h1.16b @ GHASH block 8k+6, 8k+7 - low 664 ldp $ctr_t0q, $ctr_t1q, [$input_ptr], #32 @ AES block 8k+8, 8k+9 - load plaintext 665 666 aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 6 667 rev32 $h1.16b, $rtmp_ctr.16b @ CTR block 8k+16 668 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+16 669 670 aese $ctr4b, $rk6 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 6 671 aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 6 672 aese $ctr6b, $rk6 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 6 673 674 eor3 $acc_mb, $acc_mb, $h12k.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid 675 ldp $rk8q, $rk9q, [$cc, #128] @ load rk8, rk9 676 eor3 $acc_hb, $acc_hb, $t7.16b, $t8.16b @ GHASH block 8k+6, 8k+7 - high 677 678 aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 7 679 aese $ctr7b, $rk7 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 7 680 ldp $ctr_t2q, $ctr_t3q, [$input_ptr], #32 @ AES block 8k+10, 8k+11 - load plaintext 681 682 aese $ctr5b, $rk7 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 7 683 aese $ctr6b, $rk7 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 7 684 aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 7 685 686 pmull $t11.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid 687 aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 7 688 aese $ctr4b, $rk7 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 7 689 690 rev32 $h2.16b, $rtmp_ctr.16b @ CTR block 8k+17 691 aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 7 692 693 aese $ctr5b, $rk8 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 8 694 ldp $ctr_t4q, $ctr_t5q, [$input_ptr], #32 @ AES block 8k+12, 8k+13 - load plaintext 695 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+17 696 697 aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 8 698 aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 8 699 aese $ctr7b, $rk8 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 8 700 701 aese $ctr4b, $rk8 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 8 702 eor3 $acc_mb, $acc_mb, $acc_hb, $acc_lb @ MODULO - karatsuba tidy up 703 ldr $rk10q, [$cc, #160] @ load rk10 704 705 ext $t12.16b, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment 706 rev32 $h3.16b, $rtmp_ctr.16b @ CTR block 8k+18 707 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+18 708 aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 8 709 710 aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 8 711 eor3 $acc_mb, $acc_mb, $t12.16b, $t11.16b @ MODULO - fold into mid 712 aese $ctr6b, $rk8 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 8 713 714 aese $ctr2b, $rk9 @ AES block 8k+10 - round 9 715 aese $ctr4b, $rk9 @ AES block 8k+12 - round 9 716 aese $ctr1b, $rk9 @ AES block 8k+9 - round 9 717 718 ldp $ctr_t6q, $ctr_t7q, [$input_ptr], #32 @ AES block 8k+14, 8k+15 - load plaintext 719 rev32 $h4.16b, $rtmp_ctr.16b @ CTR block 8k+19 720 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+19 721 722 cmp $input_ptr, $main_end_input_ptr @ LOOP CONTROL 723 eor3 $res4b, $ctr_t4b, $ctr4b, $rk10 @ AES block 4 - result 724 aese $ctr7b, $rk9 @ AES block 8k+15 - round 9 725 726 aese $ctr6b, $rk9 @ AES block 8k+14 - round 9 727 aese $ctr3b, $rk9 @ AES block 8k+11 - round 9 728 729 eor3 $res2b, $ctr_t2b, $ctr2b, $rk10 @ AES block 8k+10 - result 730 731 mov $ctr2.16b, $h3.16b @ CTR block 8k+18 732 aese $ctr0b, $rk9 @ AES block 8k+8 - round 9 733 734 rev32 $ctr4.16b, $rtmp_ctr.16b @ CTR block 8k+20 735 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+20 736 737 eor3 $res7b, $ctr_t7b, $ctr7b, $rk10 @ AES block 7 - result 738 aese $ctr5b, $rk9 @ AES block 8k+13 - round 9 739 pmull $acc_h.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low 740 741 eor3 $res1b, $ctr_t1b, $ctr1b, $rk10 @ AES block 8k+9 - result 742 eor3 $res3b, $ctr_t3b, $ctr3b, $rk10 @ AES block 8k+11 - result 743 mov $ctr3.16b, $h4.16b @ CTR block 8k+19 744 745 ext $t11.16b, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment 746 eor3 $res5b, $ctr_t5b, $ctr5b, $rk10 @ AES block 5 - result 747 mov $ctr1.16b, $h2.16b @ CTR block 8k+17 748 749 eor3 $res0b, $ctr_t0b, $ctr0b, $rk10 @ AES block 8k+8 - result 750 mov $ctr0.16b, $h1.16b @ CTR block 8k+16 751 stp $res0q, $res1q, [$output_ptr], #32 @ AES block 8k+8, 8k+9 - store result 752 753 stp $res2q, $res3q, [$output_ptr], #32 @ AES block 8k+10, 8k+11 - store result 754 eor3 $res6b, $ctr_t6b, $ctr6b, $rk10 @ AES block 6 - result 755 756 stp $res4q, $res5q, [$output_ptr], #32 @ AES block 8k+12, 8k+13 - store result 757 eor3 $acc_lb, $acc_lb, $acc_hb, $t11.16b @ MODULO - fold into low 758 759 stp $res6q, $res7q, [$output_ptr], #32 @ AES block 8k+14, 8k+15 - store result 760 b.lt .L128_enc_main_loop 761 762.L128_enc_prepretail: @ PREPRETAIL 763 rev32 $ctr5.16b, $rtmp_ctr.16b @ CTR block 8k+13 764 ldr $h7q, [$current_tag, #176] @ load h7l | h7h 765 ext $h7.16b, $h7.16b, $h7.16b, #8 766 ldr $h8q, [$current_tag, #208] @ load h8l | h8h 767 ext $h8.16b, $h8.16b, $h8.16b, #8 768 ext $acc_lb, $acc_lb, $acc_lb, #8 @ PRE 0 769 770 ldr $h5q, [$current_tag, #128] @ load h5l | h5h 771 ext $h5.16b, $h5.16b, $h5.16b, #8 772 ldr $h6q, [$current_tag, #160] @ load h6l | h6h 773 ext $h6.16b, $h6.16b, $h6.16b, #8 774 rev64 $res0b, $res0b @ GHASH block 8k 775 rev64 $res1b, $res1b @ GHASH block 8k+1 776 777 ldr $h56kq, [$current_tag, #144] @ load h6k | h5k 778 ldr $h78kq, [$current_tag, #192] @ load h6k | h5k 779 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+13 780 rev64 $res3b, $res3b @ GHASH block 8k+3 781 782 rev64 $res2b, $res2b @ GHASH block 8k+2 783 eor $res0b, $res0b, $acc_lb @ PRE 1 784 785 rev32 $ctr6.16b, $rtmp_ctr.16b @ CTR block 8k+14 786 787 pmull2 $t0.1q, $res1.2d, $h7.2d @ GHASH block 8k+1 - high 788 pmull $acc_l.1q, $res0.1d, $h8.1d @ GHASH block 8k - low 789 pmull2 $acc_h.1q, $res0.2d, $h8.2d @ GHASH block 8k - high 790 791 rev64 $res5b, $res5b @ GHASH block 8k+5 (t0, t1, t2 and t3 free) 792 trn1 $acc_m.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid 793 794 pmull $h7.1q, $res1.1d, $h7.1d @ GHASH block 8k+1 - low 795 eor $acc_hb, $acc_hb, $t0.16b @ GHASH block 8k+1 - high 796 trn2 $res0.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid 797 798 eor $acc_lb, $acc_lb, $h7.16b @ GHASH block 8k+1 - low 799 eor $res0.16b, $res0.16b, $acc_m.16b @ GHASH block 8k, 8k+1 - mid 800 801 ldp $rk0q, $rk1q, [$cc, #0] @ load rk0, rk1 802 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+14 803 804 pmull2 $acc_m.1q, $res0.2d, $h78k.2d @ GHASH block 8k - mid 805 pmull $h78k.1q, $res0.1d, $h78k.1d @ GHASH block 8k+1 - mid 806 807 rev64 $res4b, $res4b @ GHASH block 8k+4 (t0, t1, and t2 free) 808 rev64 $res7b, $res7b @ GHASH block 8k+7 (t0, t1, t2 and t3 free) 809 810 eor $acc_mb, $acc_mb, $h78k.16b @ GHASH block 8k+1 - mid 811 812 rev32 $ctr7.16b, $rtmp_ctr.16b @ CTR block 8k+15 813 814 rev64 $res6b, $res6b @ GHASH block 8k+6 (t0, t1, and t2 free) 815 816 aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 0 817 818 pmull2 $t2.1q, $res3.2d, $h5.2d @ GHASH block 8k+3 - high 819 pmull2 $t1.1q, $res2.2d, $h6.2d @ GHASH block 8k+2 - high 820 821 aese $ctr6b, $rk0 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 0 822 aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 0 823 824 pmull $h6.1q, $res2.1d, $h6.1d @ GHASH block 8k+2 - low 825 aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 0 826 827 eor3 $acc_hb, $acc_hb, $t1.16b, $t2.16b @ GHASH block 8k+2, 8k+3 - high 828 trn1 $t3.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid 829 trn2 $res2.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid 830 831 aese $ctr5b, $rk0 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 0 832 aese $ctr7b, $rk0 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 0 833 834 eor $res2.16b, $res2.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid 835 aese $ctr4b, $rk0 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 0 836 aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 0 837 838 aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 1 839 pmull $h5.1q, $res3.1d, $h5.1d @ GHASH block 8k+3 - low 840 841 ldr $h3q, [$current_tag, #80] @ load h3l | h3h 842 ext $h3.16b, $h3.16b, $h3.16b, #8 843 ldr $h4q, [$current_tag, #112] @ load h4l | h4h 844 ext $h4.16b, $h4.16b, $h4.16b, #8 845 846 ldp $rk2q, $rk3q, [$cc, #32] @ load rk2, rk3 847 aese $ctr5b, $rk1 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 1 848 pmull2 $t3.1q, $res2.2d, $h56k.2d @ GHASH block 8k+2 - mid 849 850 eor3 $acc_lb, $acc_lb, $h6.16b, $h5.16b @ GHASH block 8k+2, 8k+3 - low 851 pmull $h56k.1q, $res2.1d, $h56k.1d @ GHASH block 8k+3 - mid 852 853 aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 1 854 aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 1 855 856 eor3 $acc_mb, $acc_mb, $h56k.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid 857 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k 858 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k 859 aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 1 860 861 aese $ctr4b, $rk1 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 1 862 aese $ctr7b, $rk1 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 1 863 864 aese $ctr5b, $rk2 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 2 865 aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 2 866 aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 2 867 868 aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 2 869 aese $ctr6b, $rk1 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 1 870 aese $ctr4b, $rk2 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 2 871 872 aese $ctr5b, $rk3 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 3 873 aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 2 874 875 aese $ctr6b, $rk2 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 2 876 aese $ctr7b, $rk2 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 2 877 ldp $rk4q, $rk5q, [$cc, #64] @ load rk4, rk5 878 879 ldr $h1q, [$current_tag, #32] @ load h1l | h1h 880 ext $h1.16b, $h1.16b, $h1.16b, #8 881 ldr $h2q, [$current_tag, #64] @ load h1l | h1h 882 ext $h2.16b, $h2.16b, $h2.16b, #8 883 trn1 $t6.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid 884 aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 3 885 886 pmull2 $t4.1q, $res4.2d, $h4.2d @ GHASH block 8k+4 - high 887 aese $ctr6b, $rk3 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 3 888 aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 3 889 890 pmull $h4.1q, $res4.1d, $h4.1d @ GHASH block 8k+4 - low 891 trn2 $res4.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid 892 pmull2 $t5.1q, $res5.2d, $h3.2d @ GHASH block 8k+5 - high 893 894 aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 3 895 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+15 896 897 aese $ctr7b, $rk3 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 3 898 aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 3 899 eor $res4.16b, $res4.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid 900 901 pmull $h3.1q, $res5.1d, $h3.1d @ GHASH block 8k+5 - low 902 aese $ctr4b, $rk3 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 3 903 pmull2 $t7.1q, $res6.2d, $h2.2d @ GHASH block 8k+6 - high 904 905 trn1 $t9.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid 906 pmull $h2.1q, $res6.1d, $h2.1d @ GHASH block 8k+6 - low 907 trn2 $res6.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid 908 909 aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 4 910 aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 4 911 eor3 $acc_hb, $acc_hb, $t4.16b, $t5.16b @ GHASH block 8k+4, 8k+5 - high 912 913 eor3 $acc_lb, $acc_lb, $h4.16b, $h3.16b @ GHASH block 8k+4, 8k+5 - low 914 eor $res6.16b, $res6.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid 915 pmull2 $t6.1q, $res4.2d, $h34k.2d @ GHASH block 8k+4 - mid 916 917 aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 5 918 aese $ctr6b, $rk4 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 4 919 aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 4 920 921 aese $ctr7b, $rk4 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 4 922 aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 4 923 924 pmull $h34k.1q, $res4.1d, $h34k.1d @ GHASH block 8k+5 - mid 925 aese $ctr4b, $rk4 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 4 926 aese $ctr5b, $rk4 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 4 927 928 pmull2 $t8.1q, $res7.2d, $h1.2d @ GHASH block 8k+7 - high 929 ldp $rk6q, $rk7q, [$cc, #96] @ load rk6, rk7 930 pmull $h1.1q, $res7.1d, $h1.1d @ GHASH block 8k+7 - low 931 932 eor3 $acc_mb, $acc_mb, $h34k.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid 933 pmull2 $t9.1q, $res6.2d, $h12k.2d @ GHASH block 8k+6 - mid 934 pmull $h12k.1q, $res6.1d, $h12k.1d @ GHASH block 8k+7 - mid 935 936 aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 5 937 aese $ctr7b, $rk5 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 5 938 ldr $mod_constantd, [$modulo_constant] @ MODULO - load modulo constant 939 940 aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 5 941 aese $ctr4b, $rk5 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 5 942 943 eor3 $acc_hb, $acc_hb, $t7.16b, $t8.16b @ GHASH block 8k+6, 8k+7 - high 944 aese $ctr5b, $rk5 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 5 945 aese $ctr6b, $rk5 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 5 946 947 aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 5 948 aese $ctr4b, $rk6 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 6 949 950 aese $ctr5b, $rk6 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 6 951 aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 6 952 aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 6 953 954 aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 6 955 eor3 $acc_lb, $acc_lb, $h2.16b, $h1.16b @ GHASH block 8k+6, 8k+7 - low 956 eor3 $acc_mb, $acc_mb, $h12k.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid 957 958 aese $ctr6b, $rk6 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 6 959 aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 6 960 aese $ctr7b, $rk6 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 6 961 962 pmull $t11.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid 963 eor3 $acc_mb, $acc_mb, $acc_hb, $acc_lb @ MODULO - karatsuba tidy up 964 ldp $rk8q, $rk9q, [$cc, #128] @ load rk8, rk9 965 966 aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 7 967 aese $ctr6b, $rk7 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 7 968 aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 7 969 ext $t12.16b, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment 970 971 aese $ctr5b, $rk7 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 7 972 aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 7 973 eor3 $acc_mb, $acc_mb, $t12.16b, $t11.16b @ MODULO - fold into mid 974 975 aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 7 976 aese $ctr7b, $rk7 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 7 977 978 pmull $acc_h.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low 979 aese $ctr4b, $rk7 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 7 980 981 aese $ctr7b, $rk8 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 8 982 aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 8 983 aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 8 984 ext $acc_mb, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment 985 986 aese $ctr6b, $rk8 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 8 987 eor3 $acc_lb, $acc_lb, $acc_hb, $acc_mb @ MODULO - fold into low 988 aese $ctr4b, $rk8 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 8 989 990 aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 8 991 aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 8 992 aese $ctr5b, $rk8 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 8 993 994 ldr $rk10q, [$cc, #160] @ load rk10 995 aese $ctr6b, $rk9 @ AES block 8k+14 - round 9 996 aese $ctr2b, $rk9 @ AES block 8k+10 - round 9 997 998 aese $ctr0b, $rk9 @ AES block 8k+8 - round 9 999 aese $ctr1b, $rk9 @ AES block 8k+9 - round 9 1000 1001 aese $ctr3b, $rk9 @ AES block 8k+11 - round 9 1002 aese $ctr5b, $rk9 @ AES block 8k+13 - round 9 1003 1004 aese $ctr4b, $rk9 @ AES block 8k+12 - round 9 1005 aese $ctr7b, $rk9 @ AES block 8k+15 - round 9 1006.L128_enc_tail: @ TAIL 1007 1008 sub $main_end_input_ptr, $end_input_ptr, $input_ptr @ main_end_input_ptr is number of bytes left to process 1009 ldr $ctr_t0q, [$input_ptr], #16 @ AES block 8k+8 - load plaintext 1010 1011 mov $t1.16b, $rk10 1012 ldp $h5q, $h56kq, [$current_tag, #128] @ load h5l | h5h 1013 ext $h5.16b, $h5.16b, $h5.16b, #8 1014 1015 eor3 $res1b, $ctr_t0b, $ctr0b, $t1.16b @ AES block 8k+8 - result 1016 ext $t0.16b, $acc_lb, $acc_lb, #8 @ prepare final partial tag 1017 ldp $h6q, $h7q, [$current_tag, #160] @ load h6l | h6h 1018 ext $h6.16b, $h6.16b, $h6.16b, #8 1019 ext $h7.16b, $h7.16b, $h7.16b, #8 1020 1021 ldp $h78kq, $h8q, [$current_tag, #192] @ load h8k | h7k 1022 ext $h8.16b, $h8.16b, $h8.16b, #8 1023 cmp $main_end_input_ptr, #112 1024 b.gt .L128_enc_blocks_more_than_7 1025 1026 mov $ctr7b, $ctr6b 1027 mov $ctr6b, $ctr5b 1028 movi $acc_h.8b, #0 1029 1030 cmp $main_end_input_ptr, #96 1031 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 1032 mov $ctr5b, $ctr4b 1033 1034 mov $ctr4b, $ctr3b 1035 mov $ctr3b, $ctr2b 1036 mov $ctr2b, $ctr1b 1037 1038 movi $acc_l.8b, #0 1039 movi $acc_m.8b, #0 1040 b.gt .L128_enc_blocks_more_than_6 1041 1042 mov $ctr7b, $ctr6b 1043 cmp $main_end_input_ptr, #80 1044 1045 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 1046 mov $ctr6b, $ctr5b 1047 mov $ctr5b, $ctr4b 1048 1049 mov $ctr4b, $ctr3b 1050 mov $ctr3b, $ctr1b 1051 b.gt .L128_enc_blocks_more_than_5 1052 1053 cmp $main_end_input_ptr, #64 1054 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 1055 1056 mov $ctr7b, $ctr6b 1057 mov $ctr6b, $ctr5b 1058 1059 mov $ctr5b, $ctr4b 1060 mov $ctr4b, $ctr1b 1061 b.gt .L128_enc_blocks_more_than_4 1062 1063 mov $ctr7b, $ctr6b 1064 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 1065 mov $ctr6b, $ctr5b 1066 1067 mov $ctr5b, $ctr1b 1068 cmp $main_end_input_ptr, #48 1069 b.gt .L128_enc_blocks_more_than_3 1070 1071 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 1072 mov $ctr7b, $ctr6b 1073 mov $ctr6b, $ctr1b 1074 1075 cmp $main_end_input_ptr, #32 1076 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k 1077 b.gt .L128_enc_blocks_more_than_2 1078 1079 cmp $main_end_input_ptr, #16 1080 1081 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 1082 mov $ctr7b, $ctr1b 1083 b.gt .L128_enc_blocks_more_than_1 1084 1085 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k 1086 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 1087 b .L128_enc_blocks_less_than_1 1088.L128_enc_blocks_more_than_7: @ blocks left > 7 1089 st1 { $res1b}, [$output_ptr], #16 @ AES final-7 block - store result 1090 1091 rev64 $res0b, $res1b @ GHASH final-7 block 1092 ldr $ctr_t1q, [$input_ptr], #16 @ AES final-6 block - load plaintext 1093 1094 eor $res0b, $res0b, $t0.16b @ feed in partial tag 1095 1096 ins $rk4v.d[0], $res0.d[1] @ GHASH final-7 block - mid 1097 1098 pmull2 $acc_h.1q, $res0.2d, $h8.2d @ GHASH final-7 block - high 1099 1100 ins $acc_m.d[0], $h78k.d[1] @ GHASH final-7 block - mid 1101 1102 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-7 block - mid 1103 movi $t0.8b, #0 @ suppress further partial tag feed in 1104 1105 eor3 $res1b, $ctr_t1b, $ctr1b, $t1.16b @ AES final-6 block - result 1106 1107 pmull $acc_m.1q, $rk4v.1d, $acc_m.1d @ GHASH final-7 block - mid 1108 pmull $acc_l.1q, $res0.1d, $h8.1d @ GHASH final-7 block - low 1109.L128_enc_blocks_more_than_6: @ blocks left > 6 1110 1111 st1 { $res1b}, [$output_ptr], #16 @ AES final-6 block - store result 1112 1113 rev64 $res0b, $res1b @ GHASH final-6 block 1114 ldr $ctr_t1q, [$input_ptr], #16 @ AES final-5 block - load plaintext 1115 1116 eor $res0b, $res0b, $t0.16b @ feed in partial tag 1117 1118 ins $rk4v.d[0], $res0.d[1] @ GHASH final-6 block - mid 1119 1120 eor3 $res1b, $ctr_t1b, $ctr2b, $t1.16b @ AES final-5 block - result 1121 pmull $rk3q1, $res0.1d, $h7.1d @ GHASH final-6 block - low 1122 1123 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-6 block - mid 1124 movi $t0.8b, #0 @ suppress further partial tag feed in 1125 1126 pmull $rk4v.1q, $rk4v.1d, $h78k.1d @ GHASH final-6 block - mid 1127 pmull2 $rk2q1, $res0.2d, $h7.2d @ GHASH final-6 block - high 1128 1129 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-6 block - low 1130 1131 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-6 block - mid 1132 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-6 block - high 1133.L128_enc_blocks_more_than_5: @ blocks left > 5 1134 1135 st1 { $res1b}, [$output_ptr], #16 @ AES final-5 block - store result 1136 1137 rev64 $res0b, $res1b @ GHASH final-5 block 1138 1139 eor $res0b, $res0b, $t0.16b @ feed in partial tag 1140 1141 ins $rk4v.d[0], $res0.d[1] @ GHASH final-5 block - mid 1142 ldr $ctr_t1q, [$input_ptr], #16 @ AES final-4 block - load plaintext 1143 pmull2 $rk2q1, $res0.2d, $h6.2d @ GHASH final-5 block - high 1144 1145 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-5 block - high 1146 1147 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-5 block - mid 1148 1149 ins $rk4v.d[1], $rk4v.d[0] @ GHASH final-5 block - mid 1150 1151 eor3 $res1b, $ctr_t1b, $ctr3b, $t1.16b @ AES final-4 block - result 1152 pmull $rk3q1, $res0.1d, $h6.1d @ GHASH final-5 block - low 1153 movi $t0.8b, #0 @ suppress further partial tag feed in 1154 1155 pmull2 $rk4v.1q, $rk4v.2d, $h56k.2d @ GHASH final-5 block - mid 1156 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-5 block - low 1157 1158 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-5 block - mid 1159.L128_enc_blocks_more_than_4: @ blocks left > 4 1160 1161 st1 { $res1b}, [$output_ptr], #16 @ AES final-4 block - store result 1162 1163 rev64 $res0b, $res1b @ GHASH final-4 block 1164 1165 ldr $ctr_t1q, [$input_ptr], #16 @ AES final-3 block - load plaintext 1166 1167 eor $res0b, $res0b, $t0.16b @ feed in partial tag 1168 1169 ins $rk4v.d[0], $res0.d[1] @ GHASH final-4 block - mid 1170 movi $t0.8b, #0 @ suppress further partial tag feed in 1171 pmull2 $rk2q1, $res0.2d, $h5.2d @ GHASH final-4 block - high 1172 1173 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-4 block - mid 1174 1175 pmull $rk3q1, $res0.1d, $h5.1d @ GHASH final-4 block - low 1176 1177 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-4 block - high 1178 pmull $rk4v.1q, $rk4v.1d, $h56k.1d @ GHASH final-4 block - mid 1179 1180 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-4 block - low 1181 1182 eor3 $res1b, $ctr_t1b, $ctr4b, $t1.16b @ AES final-3 block - result 1183 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-4 block - mid 1184.L128_enc_blocks_more_than_3: @ blocks left > 3 1185 1186 st1 { $res1b}, [$output_ptr], #16 @ AES final-3 block - store result 1187 1188 ldr $h4q, [$current_tag, #112] @ load h4l | h4h 1189 ext $h4.16b, $h4.16b, $h4.16b, #8 1190 1191 rev64 $res0b, $res1b @ GHASH final-3 block 1192 1193 eor $res0b, $res0b, $t0.16b @ feed in partial tag 1194 movi $t0.8b, #0 @ suppress further partial tag feed in 1195 1196 ins $rk4v.d[0], $res0.d[1] @ GHASH final-3 block - mid 1197 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k 1198 pmull $rk3q1, $res0.1d, $h4.1d @ GHASH final-3 block - low 1199 1200 ldr $ctr_t1q, [$input_ptr], #16 @ AES final-2 block - load plaintext 1201 1202 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-3 block - mid 1203 1204 ins $rk4v.d[1], $rk4v.d[0] @ GHASH final-3 block - mid 1205 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-3 block - low 1206 1207 eor3 $res1b, $ctr_t1b, $ctr5b, $t1.16b @ AES final-2 block - result 1208 1209 pmull2 $rk4v.1q, $rk4v.2d, $h34k.2d @ GHASH final-3 block - mid 1210 pmull2 $rk2q1, $res0.2d, $h4.2d @ GHASH final-3 block - high 1211 1212 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-3 block - mid 1213 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-3 block - high 1214.L128_enc_blocks_more_than_2: @ blocks left > 2 1215 1216 st1 { $res1b}, [$output_ptr], #16 @ AES final-2 block - store result 1217 1218 rev64 $res0b, $res1b @ GHASH final-2 block 1219 1220 eor $res0b, $res0b, $t0.16b @ feed in partial tag 1221 1222 ldr $ctr_t1q, [$input_ptr], #16 @ AES final-1 block - load plaintext 1223 1224 ins $rk4v.d[0], $res0.d[1] @ GHASH final-2 block - mid 1225 ldr $h3q, [$current_tag, #80] @ load h3l | h3h 1226 ext $h3.16b, $h3.16b, $h3.16b, #8 1227 movi $t0.8b, #0 @ suppress further partial tag feed in 1228 1229 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-2 block - mid 1230 eor3 $res1b, $ctr_t1b, $ctr6b, $t1.16b @ AES final-1 block - result 1231 1232 pmull2 $rk2q1, $res0.2d, $h3.2d @ GHASH final-2 block - high 1233 1234 pmull $rk3q1, $res0.1d, $h3.1d @ GHASH final-2 block - low 1235 pmull $rk4v.1q, $rk4v.1d, $h34k.1d @ GHASH final-2 block - mid 1236 1237 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-2 block - high 1238 1239 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-2 block - mid 1240 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-2 block - low 1241.L128_enc_blocks_more_than_1: @ blocks left > 1 1242 1243 st1 { $res1b}, [$output_ptr], #16 @ AES final-1 block - store result 1244 1245 ldr $h2q, [$current_tag, #64] @ load h2l | h2h 1246 ext $h2.16b, $h2.16b, $h2.16b, #8 1247 rev64 $res0b, $res1b @ GHASH final-1 block 1248 ldr $ctr_t1q, [$input_ptr], #16 @ AES final block - load plaintext 1249 1250 eor $res0b, $res0b, $t0.16b @ feed in partial tag 1251 1252 movi $t0.8b, #0 @ suppress further partial tag feed in 1253 ins $rk4v.d[0], $res0.d[1] @ GHASH final-1 block - mid 1254 eor3 $res1b, $ctr_t1b, $ctr7b, $t1.16b @ AES final block - result 1255 1256 pmull2 $rk2q1, $res0.2d, $h2.2d @ GHASH final-1 block - high 1257 1258 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-1 block - mid 1259 1260 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k 1261 1262 ins $rk4v.d[1], $rk4v.d[0] @ GHASH final-1 block - mid 1263 1264 pmull $rk3q1, $res0.1d, $h2.1d @ GHASH final-1 block - low 1265 pmull2 $rk4v.1q, $rk4v.2d, $h12k.2d @ GHASH final-1 block - mid 1266 1267 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-1 block - high 1268 1269 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-1 block - mid 1270 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-1 block - low 1271.L128_enc_blocks_less_than_1: @ blocks left <= 1 1272 1273 rev32 $rtmp_ctr.16b, $rtmp_ctr.16b 1274 str $rtmp_ctrq, [$counter] @ store the updated counter 1275 and $bit_length, $bit_length, #127 @ bit_length %= 128 1276 1277 sub $bit_length, $bit_length, #128 @ bit_length -= 128 1278 1279 neg $bit_length, $bit_length @ bit_length = 128 - #bits in input (in range [1,128]) 1280 1281 mvn $temp0_x, xzr @ temp0_x = 0xffffffffffffffff 1282 ld1 { $rk0}, [$output_ptr] @ load existing bytes where the possibly partial last block is to be stored 1283 and $bit_length, $bit_length, #127 @ bit_length %= 128 1284 1285 lsr $temp0_x, $temp0_x, $bit_length @ temp0_x is mask for top 64b of last block 1286 mvn $temp1_x, xzr @ temp1_x = 0xffffffffffffffff 1287 cmp $bit_length, #64 1288 1289 csel $temp2_x, $temp1_x, $temp0_x, lt 1290 csel $temp3_x, $temp0_x, xzr, lt 1291 1292 mov $ctr0.d[1], $temp3_x 1293 mov $ctr0.d[0], $temp2_x @ ctr0b is mask for last block 1294 1295 and $res1b, $res1b, $ctr0b @ possibly partial last block has zeroes in highest bits 1296 1297 rev64 $res0b, $res1b @ GHASH final block 1298 1299 bif $res1b, $rk0, $ctr0b @ insert existing bytes in top end of result before storing 1300 st1 { $res1b}, [$output_ptr] @ store all 16B 1301 1302 eor $res0b, $res0b, $t0.16b @ feed in partial tag 1303 1304 ins $t0.d[0], $res0.d[1] @ GHASH final block - mid 1305 1306 eor $t0.8b, $t0.8b, $res0.8b @ GHASH final block - mid 1307 ldr $h1q, [$current_tag, #32] @ load h1l | h1h 1308 ext $h1.16b, $h1.16b, $h1.16b, #8 1309 1310 pmull $t0.1q, $t0.1d, $h12k.1d @ GHASH final block - mid 1311 1312 pmull2 $rk2q1, $res0.2d, $h1.2d @ GHASH final block - high 1313 eor $acc_mb, $acc_mb, $t0.16b @ GHASH final block - mid 1314 ldr $mod_constantd, [$modulo_constant] @ MODULO - load modulo constant 1315 1316 pmull $rk3q1, $res0.1d, $h1.1d @ GHASH final block - low 1317 1318 eor $acc_hb, $acc_hb, $rk2 @ GHASH final block - high 1319 1320 eor $acc_lb, $acc_lb, $rk3 @ GHASH final block - low 1321 1322 ext $t11.16b, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment 1323 pmull $t12.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid 1324 1325 eor3 $acc_mb, $acc_mb, $acc_hb, $acc_lb @ MODULO - karatsuba tidy up 1326 1327 eor3 $acc_mb, $acc_mb, $t12.16b, $t11.16b @ MODULO - fold into mid 1328 1329 pmull $acc_h.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low 1330 ext $t11.16b, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment 1331 1332 eor3 $acc_lb, $acc_lb, $acc_hb, $t11.16b @ MODULO - fold into low 1333 ext $acc_lb, $acc_lb, $acc_lb, #8 1334 rev64 $acc_lb, $acc_lb 1335 st1 { $acc_l.16b }, [$current_tag] 1336 mov x0, $byte_length 1337 1338 ldp d10, d11, [sp, #16] 1339 ldp d12, d13, [sp, #32] 1340 ldp d14, d15, [sp, #48] 1341 ldp d8, d9, [sp], #80 1342 ret 1343 1344.L128_enc_ret: 1345 mov w0, #0x0 1346 ret 1347.size unroll8_eor3_aes_gcm_enc_128_kernel,.-unroll8_eor3_aes_gcm_enc_128_kernel 1348___ 1349 1350######################################################################################### 1351# size_t unroll8_eor3_aes_gcm_dec_128_kernel(const uint8_t * ciphertext, 1352# uint64_t plaintext_length, 1353# uint8_t * plaintext, 1354# uint64_t *Xi, 1355# unsigned char ivec[16], 1356# const void *key); 1357# 1358$code.=<<___; 1359.global unroll8_eor3_aes_gcm_dec_128_kernel 1360.type unroll8_eor3_aes_gcm_dec_128_kernel,%function 1361.align 4 1362unroll8_eor3_aes_gcm_dec_128_kernel: 1363 AARCH64_VALID_CALL_TARGET 1364 cbz x1, .L128_dec_ret 1365 stp d8, d9, [sp, #-80]! 1366 lsr $byte_length, $bit_length, #3 1367 mov $counter, x4 1368 mov $cc, x5 1369 stp d10, d11, [sp, #16] 1370 stp d12, d13, [sp, #32] 1371 stp d14, d15, [sp, #48] 1372 mov x5, #0xc200000000000000 1373 stp x5, xzr, [sp, #64] 1374 add $modulo_constant, sp, #64 1375 1376 mov $main_end_input_ptr, $byte_length 1377 ld1 { $ctr0b}, [$counter] @ CTR block 0 1378 1379 ldp $rk0q, $rk1q, [$cc, #0] @ load rk0, rk1 1380 sub $main_end_input_ptr, $main_end_input_ptr, #1 @ byte_len - 1 1381 1382 mov $constant_temp, #0x100000000 @ set up counter increment 1383 movi $rctr_inc.16b, #0x0 1384 mov $rctr_inc.d[1], $constant_temp 1385 ld1 { $acc_lb}, [$current_tag] 1386 ext $acc_lb, $acc_lb, $acc_lb, #8 1387 rev64 $acc_lb, $acc_lb 1388 1389 rev32 $rtmp_ctr.16b, $ctr0.16b @ set up reversed counter 1390 1391 aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 0 1392 1393 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 0 1394 1395 rev32 $ctr1.16b, $rtmp_ctr.16b @ CTR block 1 1396 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 1 1397 1398 and $main_end_input_ptr, $main_end_input_ptr, #0xffffffffffffff80 @ number of bytes to be processed in main loop (at least 1 byte must be handled by tail) 1399 1400 rev32 $ctr2.16b, $rtmp_ctr.16b @ CTR block 2 1401 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 2 1402 aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 0 1403 1404 rev32 $ctr3.16b, $rtmp_ctr.16b @ CTR block 3 1405 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 3 1406 1407 aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 1 1408 aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 1 1409 1410 rev32 $ctr4.16b, $rtmp_ctr.16b @ CTR block 4 1411 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 4 1412 1413 rev32 $ctr5.16b, $rtmp_ctr.16b @ CTR block 5 1414 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 5 1415 1416 aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 0 1417 1418 rev32 $ctr6.16b, $rtmp_ctr.16b @ CTR block 6 1419 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 6 1420 aese $ctr5b, $rk0 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 0 1421 1422 aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 0 1423 aese $ctr4b, $rk0 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 0 1424 1425 rev32 $ctr7.16b, $rtmp_ctr.16b @ CTR block 7 1426 1427 aese $ctr6b, $rk0 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 0 1428 aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 1 1429 1430 aese $ctr7b, $rk0 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 0 1431 1432 ldp $rk2q, $rk3q, [$cc, #32] @ load rk2, rk3 1433 1434 aese $ctr6b, $rk1 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 1 1435 aese $ctr5b, $rk1 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 1 1436 1437 aese $ctr4b, $rk1 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 1 1438 aese $ctr7b, $rk1 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 1 1439 1440 aese $ctr7b, $rk2 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 2 1441 aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 2 1442 aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 1 1443 1444 aese $ctr6b, $rk2 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 2 1445 aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 2 1446 aese $ctr5b, $rk2 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 2 1447 1448 aese $ctr4b, $rk2 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 2 1449 aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 2 1450 aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 2 1451 1452 aese $ctr6b, $rk3 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 3 1453 aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 3 1454 1455 ldp $rk4q, $rk5q, [$cc, #64] @ load rk4, rk5 1456 aese $ctr5b, $rk3 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 3 1457 1458 aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 3 1459 aese $ctr7b, $rk3 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 3 1460 1461 aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 3 1462 aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 3 1463 1464 aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 4 1465 aese $ctr7b, $rk4 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 4 1466 aese $ctr4b, $rk3 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 3 1467 1468 aese $ctr6b, $rk4 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 4 1469 aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 4 1470 aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 4 1471 1472 aese $ctr5b, $rk4 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 4 1473 aese $ctr4b, $rk4 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 4 1474 aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 4 1475 1476 ldp $rk6q, $rk7q, [$cc, #96] @ load rk6, rk7 1477 aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 5 1478 aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 5 1479 1480 aese $ctr6b, $rk5 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 5 1481 aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 5 1482 1483 aese $ctr7b, $rk5 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 5 1484 aese $ctr5b, $rk5 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 5 1485 1486 aese $ctr4b, $rk5 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 5 1487 1488 aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 6 1489 aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 6 1490 aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 5 1491 1492 aese $ctr5b, $rk6 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 6 1493 aese $ctr4b, $rk6 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 6 1494 aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 6 1495 1496 aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 6 1497 aese $ctr7b, $rk6 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 6 1498 aese $ctr6b, $rk6 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 6 1499 1500 aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 7 1501 aese $ctr4b, $rk7 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 7 1502 aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 7 1503 1504 aese $ctr7b, $rk7 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 7 1505 aese $ctr5b, $rk7 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 7 1506 ldp $rk8q, $rk9q, [$cc, #128] @ load rk8, rk9 1507 1508 aese $ctr6b, $rk7 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 7 1509 aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 7 1510 aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 7 1511 1512 add $main_end_input_ptr, $main_end_input_ptr, $input_ptr 1513 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 7 1514 1515 aese $ctr6b, $rk8 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 8 1516 aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 8 1517 1518 aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 8 1519 aese $ctr7b, $rk8 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 8 1520 aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 8 1521 1522 aese $ctr5b, $rk8 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 8 1523 aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 8 1524 aese $ctr4b, $rk8 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 8 1525 1526 aese $ctr0b, $rk9 @ AES block 0 - round 9 1527 aese $ctr1b, $rk9 @ AES block 1 - round 9 1528 aese $ctr6b, $rk9 @ AES block 6 - round 9 1529 1530 ldr $rk10q, [$cc, #160] @ load rk10 1531 aese $ctr4b, $rk9 @ AES block 4 - round 9 1532 aese $ctr3b, $rk9 @ AES block 3 - round 9 1533 1534 aese $ctr2b, $rk9 @ AES block 2 - round 9 1535 aese $ctr5b, $rk9 @ AES block 5 - round 9 1536 aese $ctr7b, $rk9 @ AES block 7 - round 9 1537 1538 add $end_input_ptr, $input_ptr, $bit_length, lsr #3 @ end_input_ptr 1539 cmp $input_ptr, $main_end_input_ptr @ check if we have <= 8 blocks 1540 b.ge .L128_dec_tail @ handle tail 1541 1542 ldp $res0q, $res1q, [$input_ptr], #32 @ AES block 0, 1 - load ciphertext 1543 1544 eor3 $ctr0b, $res0b, $ctr0b, $rk10 @ AES block 0 - result 1545 eor3 $ctr1b, $res1b, $ctr1b, $rk10 @ AES block 1 - result 1546 stp $ctr0q, $ctr1q, [$output_ptr], #32 @ AES block 0, 1 - store result 1547 1548 rev32 $ctr0.16b, $rtmp_ctr.16b @ CTR block 8 1549 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8 1550 ldp $res2q, $res3q, [$input_ptr], #32 @ AES block 2, 3 - load ciphertext 1551 1552 ldp $res4q, $res5q, [$input_ptr], #32 @ AES block 4, 5 - load ciphertext 1553 1554 rev32 $ctr1.16b, $rtmp_ctr.16b @ CTR block 9 1555 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 9 1556 ldp $res6q, $res7q, [$input_ptr], #32 @ AES block 6, 7 - load ciphertext 1557 1558 eor3 $ctr3b, $res3b, $ctr3b, $rk10 @ AES block 3 - result 1559 eor3 $ctr2b, $res2b, $ctr2b, $rk10 @ AES block 2 - result 1560 stp $ctr2q, $ctr3q, [$output_ptr], #32 @ AES block 2, 3 - store result 1561 1562 rev32 $ctr2.16b, $rtmp_ctr.16b @ CTR block 10 1563 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 10 1564 1565 eor3 $ctr6b, $res6b, $ctr6b, $rk10 @ AES block 6 - result 1566 1567 rev32 $ctr3.16b, $rtmp_ctr.16b @ CTR block 11 1568 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 11 1569 1570 eor3 $ctr4b, $res4b, $ctr4b, $rk10 @ AES block 4 - result 1571 eor3 $ctr5b, $res5b, $ctr5b, $rk10 @ AES block 5 - result 1572 stp $ctr4q, $ctr5q, [$output_ptr], #32 @ AES block 4, 5 - store result 1573 1574 eor3 $ctr7b, $res7b, $ctr7b, $rk10 @ AES block 7 - result 1575 stp $ctr6q, $ctr7q, [$output_ptr], #32 @ AES block 6, 7 - store result 1576 rev32 $ctr4.16b, $rtmp_ctr.16b @ CTR block 12 1577 1578 cmp $input_ptr, $main_end_input_ptr @ check if we have <= 8 blocks 1579 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 12 1580 b.ge .L128_dec_prepretail @ do prepretail 1581 1582.L128_dec_main_loop: @ main loop start 1583 ldr $h7q, [$current_tag, #176] @ load h7l | h7h 1584 ext $h7.16b, $h7.16b, $h7.16b, #8 1585 ldr $h8q, [$current_tag, #208] @ load h8l | h8h 1586 ext $h8.16b, $h8.16b, $h8.16b, #8 1587 1588 rev64 $res1b, $res1b @ GHASH block 8k+1 1589 rev64 $res0b, $res0b @ GHASH block 8k 1590 ext $acc_lb, $acc_lb, $acc_lb, #8 @ PRE 0 1591 1592 rev64 $res6b, $res6b @ GHASH block 8k+6 1593 ldr $h5q, [$current_tag, #128] @ load h5l | h5h 1594 ext $h5.16b, $h5.16b, $h5.16b, #8 1595 ldr $h6q, [$current_tag, #160] @ load h6l | h6h 1596 ext $h6.16b, $h6.16b, $h6.16b, #8 1597 1598 eor $res0b, $res0b, $acc_lb @ PRE 1 1599 rev32 $ctr5.16b, $rtmp_ctr.16b @ CTR block 8k+13 1600 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+13 1601 1602 rev64 $res2b, $res2b @ GHASH block 8k+2 1603 rev64 $res4b, $res4b @ GHASH block 8k+4 1604 ldp $rk0q, $rk1q, [$cc, #0] @ load rk0, rk1 1605 1606 rev32 $ctr6.16b, $rtmp_ctr.16b @ CTR block 8k+14 1607 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+14 1608 ldr $h56kq, [$current_tag, #144] @ load h6k | h5k 1609 ldr $h78kq, [$current_tag, #192] @ load h8k | h7k 1610 1611 pmull2 $t0.1q, $res1.2d, $h7.2d @ GHASH block 8k+1 - high 1612 pmull2 $acc_h.1q, $res0.2d, $h8.2d @ GHASH block 8k - high 1613 rev64 $res3b, $res3b @ GHASH block 8k+3 1614 1615 rev32 $ctr7.16b, $rtmp_ctr.16b @ CTR block 8k+15 1616 trn1 $acc_m.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid 1617 rev64 $res5b, $res5b @ GHASH block 8k+5 1618 1619 pmull $h7.1q, $res1.1d, $h7.1d @ GHASH block 8k+1 - low 1620 pmull $acc_l.1q, $res0.1d, $h8.1d @ GHASH block 8k - low 1621 trn2 $res0.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid 1622 1623 pmull2 $t1.1q, $res2.2d, $h6.2d @ GHASH block 8k+2 - high 1624 aese $ctr4b, $rk0 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 0 1625 pmull2 $t2.1q, $res3.2d, $h5.2d @ GHASH block 8k+3 - high 1626 1627 aese $ctr6b, $rk0 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 0 1628 aese $ctr5b, $rk0 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 0 1629 aese $ctr7b, $rk0 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 0 1630 1631 aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 0 1632 aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 0 1633 eor $acc_hb, $acc_hb, $t0.16b @ GHASH block 8k+1 - high 1634 1635 aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 0 1636 eor $res0.16b, $res0.16b, $acc_m.16b @ GHASH block 8k, 8k+1 - mid 1637 aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 0 1638 1639 aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 1 1640 eor $acc_lb, $acc_lb, $h7.16b @ GHASH block 8k+1 - low 1641 eor3 $acc_hb, $acc_hb, $t1.16b, $t2.16b @ GHASH block 8k+2, 8k+3 - high 1642 1643 ldp $rk2q, $rk3q, [$cc, #32] @ load rk2, rk3 1644 trn1 $t3.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid 1645 aese $ctr7b, $rk1 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 1 1646 1647 pmull $h6.1q, $res2.1d, $h6.1d @ GHASH block 8k+2 - low 1648 trn2 $res2.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid 1649 pmull2 $acc_m.1q, $res0.2d, $h78k.2d @ GHASH block 8k - mid 1650 1651 ldr $h3q, [$current_tag, #80] @ load h3l | h3h 1652 ext $h3.16b, $h3.16b, $h3.16b, #8 1653 ldr $h4q, [$current_tag, #112] @ load h4l | h4h 1654 ext $h4.16b, $h4.16b, $h4.16b, #8 1655 pmull $h78k.1q, $res0.1d, $h78k.1d @ GHASH block 8k+1 - mid 1656 aese $ctr6b, $rk1 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 1 1657 1658 aese $ctr4b, $rk1 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 1 1659 aese $ctr5b, $rk1 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 1 1660 pmull $h5.1q, $res3.1d, $h5.1d @ GHASH block 8k+3 - low 1661 1662 aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 1 1663 aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 1 1664 aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 1 1665 1666 aese $ctr7b, $rk2 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 2 1667 aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 2 1668 eor3 $acc_lb, $acc_lb, $h6.16b, $h5.16b @ GHASH block 8k+2, 8k+3 - low 1669 1670 aese $ctr4b, $rk2 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 2 1671 eor $res2.16b, $res2.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid 1672 ldr $h1q, [$current_tag, #32] @ load h1l | h1h 1673 ext $h1.16b, $h1.16b, $h1.16b, #8 1674 ldr $h2q, [$current_tag, #64] @ load h2l | h2h 1675 ext $h2.16b, $h2.16b, $h2.16b, #8 1676 1677 eor $acc_mb, $acc_mb, $h78k.16b @ GHASH block 8k+1 - mid 1678 aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 2 1679 aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 2 1680 1681 trn1 $t6.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid 1682 aese $ctr5b, $rk2 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 2 1683 aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 2 1684 1685 aese $ctr6b, $rk2 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 2 1686 pmull2 $t3.1q, $res2.2d, $h56k.2d @ GHASH block 8k+2 - mid 1687 pmull $h56k.1q, $res2.1d, $h56k.1d @ GHASH block 8k+3 - mid 1688 1689 aese $ctr7b, $rk3 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 3 1690 rev64 $res7b, $res7b @ GHASH block 8k+7 1691 pmull2 $t4.1q, $res4.2d, $h4.2d @ GHASH block 8k+4 - high 1692 1693 ldp $rk4q, $rk5q, [$cc, #64] @ load rk4, rk5 1694 pmull $h4.1q, $res4.1d, $h4.1d @ GHASH block 8k+4 - low 1695 eor3 $acc_mb, $acc_mb, $h56k.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid 1696 1697 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k 1698 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k 1699 aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 3 1700 trn2 $res4.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid 1701 1702 aese $ctr4b, $rk3 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 3 1703 aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 3 1704 aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 3 1705 1706 aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 3 1707 aese $ctr6b, $rk3 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 3 1708 aese $ctr5b, $rk3 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 3 1709 1710 pmull2 $t5.1q, $res5.2d, $h3.2d @ GHASH block 8k+5 - high 1711 pmull $h3.1q, $res5.1d, $h3.1d @ GHASH block 8k+5 - low 1712 pmull2 $t7.1q, $res6.2d, $h2.2d @ GHASH block 8k+6 - high 1713 1714 pmull $h2.1q, $res6.1d, $h2.1d @ GHASH block 8k+6 - low 1715 aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 4 1716 aese $ctr7b, $rk4 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 4 1717 1718 eor $res4.16b, $res4.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid 1719 trn1 $t9.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid 1720 aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 4 1721 1722 aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 4 1723 aese $ctr5b, $rk4 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 4 1724 aese $ctr6b, $rk4 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 4 1725 1726 aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 4 1727 aese $ctr4b, $rk4 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 4 1728 trn2 $res6.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid 1729 1730 ldp $rk6q, $rk7q, [$cc, #96] @ load rk6, rk7 1731 aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 5 1732 pmull2 $t6.1q, $res4.2d, $h34k.2d @ GHASH block 8k+4 - mid 1733 1734 aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 5 1735 eor $res6.16b, $res6.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid 1736 aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 5 1737 1738 pmull $h34k.1q, $res4.1d, $h34k.1d @ GHASH block 8k+5 - mid 1739 aese $ctr6b, $rk5 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 5 1740 aese $ctr7b, $rk5 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 5 1741 1742 aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 5 1743 aese $ctr5b, $rk5 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 5 1744 aese $ctr4b, $rk5 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 5 1745 1746 pmull2 $t8.1q, $res7.2d, $h1.2d @ GHASH block 8k+7 - high 1747 eor3 $acc_mb, $acc_mb, $h34k.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid 1748 eor3 $acc_lb, $acc_lb, $h4.16b, $h3.16b @ GHASH block 8k+4, 8k+5 - low 1749 1750 aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 6 1751 eor3 $acc_hb, $acc_hb, $t4.16b, $t5.16b @ GHASH block 8k+4, 8k+5 - high 1752 aese $ctr7b, $rk6 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 6 1753 1754 aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 6 1755 pmull2 $t9.1q, $res6.2d, $h12k.2d @ GHASH block 8k+6 - mid 1756 aese $ctr6b, $rk6 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 6 1757 1758 aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 6 1759 aese $ctr5b, $rk6 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 6 1760 pmull $h1.1q, $res7.1d, $h1.1d @ GHASH block 8k+7 - low 1761 1762 pmull $h12k.1q, $res6.1d, $h12k.1d @ GHASH block 8k+7 - mid 1763 aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 6 1764 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+15 1765 1766 eor3 $acc_hb, $acc_hb, $t7.16b, $t8.16b @ GHASH block 8k+6, 8k+7 - high 1767 aese $ctr4b, $rk6 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 6 1768 ldp $rk8q, $rk9q, [$cc, #128] @ load rk8, rk9 1769 1770 ldr $mod_constantd, [$modulo_constant] @ MODULO - load modulo constant 1771 eor3 $acc_lb, $acc_lb, $h2.16b, $h1.16b @ GHASH block 8k+6, 8k+7 - low 1772 aese $ctr5b, $rk7 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 7 1773 1774 rev32 $h1.16b, $rtmp_ctr.16b @ CTR block 8k+16 1775 eor3 $acc_mb, $acc_mb, $h12k.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid 1776 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+16 1777 1778 aese $ctr6b, $rk7 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 7 1779 aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 7 1780 aese $ctr7b, $rk7 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 7 1781 1782 aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 7 1783 aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 7 1784 rev32 $h2.16b, $rtmp_ctr.16b @ CTR block 8k+17 1785 1786 aese $ctr4b, $rk7 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 7 1787 ext $t11.16b, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment 1788 pmull $t12.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid 1789 1790 eor3 $acc_mb, $acc_mb, $acc_hb, $acc_lb @ MODULO - karatsuba tidy up 1791 aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 7 1792 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+17 1793 1794 aese $ctr5b, $rk8 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 8 1795 aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 8 1796 ldp $res0q, $res1q, [$input_ptr], #32 @ AES block 8k+8, 8k+9 - load ciphertext 1797 1798 ldp $res2q, $res3q, [$input_ptr], #32 @ AES block 8k+10, 8k+11 - load ciphertext 1799 aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 8 1800 rev32 $h3.16b, $rtmp_ctr.16b @ CTR block 8k+18 1801 1802 ldp $res4q, $res5q, [$input_ptr], #32 @ AES block 8k+12, 8k+13 - load ciphertext 1803 aese $ctr4b, $rk8 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 8 1804 eor3 $acc_mb, $acc_mb, $t12.16b, $t11.16b @ MODULO - fold into mid 1805 1806 ldp $res6q, $res7q, [$input_ptr], #32 @ AES block 8k+14, 8k+15 - load ciphertext 1807 aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 8 1808 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+18 1809 1810 aese $ctr7b, $rk8 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 8 1811 aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 8 1812 aese $ctr6b, $rk8 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 8 1813 1814 aese $ctr0b, $rk9 @ AES block 8k+8 - round 9 1815 aese $ctr1b, $rk9 @ AES block 8k+9 - round 9 1816 ldr $rk10q, [$cc, #160] @ load rk10 1817 1818 aese $ctr6b, $rk9 @ AES block 8k+14 - round 9 1819 pmull $acc_h.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low 1820 aese $ctr2b, $rk9 @ AES block 8k+10 - round 9 1821 1822 aese $ctr7b, $rk9 @ AES block 8k+15 - round 9 1823 aese $ctr4b, $rk9 @ AES block 8k+12 - round 9 1824 ext $t11.16b, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment 1825 1826 rev32 $h4.16b, $rtmp_ctr.16b @ CTR block 8k+19 1827 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+19 1828 1829 aese $ctr3b, $rk9 @ AES block 8k+11 - round 9 1830 aese $ctr5b, $rk9 @ AES block 8k+13 - round 9 1831 eor3 $ctr1b, $res1b, $ctr1b, $rk10 @ AES block 8k+9 - result 1832 1833 eor3 $ctr0b, $res0b, $ctr0b, $rk10 @ AES block 8k+8 - result 1834 eor3 $ctr7b, $res7b, $ctr7b, $rk10 @ AES block 8k+15 - result 1835 eor3 $ctr6b, $res6b, $ctr6b, $rk10 @ AES block 8k+14 - result 1836 1837 eor3 $ctr2b, $res2b, $ctr2b, $rk10 @ AES block 8k+10 - result 1838 stp $ctr0q, $ctr1q, [$output_ptr], #32 @ AES block 8k+8, 8k+9 - store result 1839 mov $ctr1.16b, $h2.16b @ CTR block 8k+17 1840 1841 eor3 $ctr4b, $res4b, $ctr4b, $rk10 @ AES block 8k+12 - result 1842 eor3 $acc_lb, $acc_lb, $acc_hb, $t11.16b @ MODULO - fold into low 1843 mov $ctr0.16b, $h1.16b @ CTR block 8k+16 1844 1845 eor3 $ctr3b, $res3b, $ctr3b, $rk10 @ AES block 8k+11 - result 1846 cmp $input_ptr, $main_end_input_ptr @ LOOP CONTROL 1847 stp $ctr2q, $ctr3q, [$output_ptr], #32 @ AES block 8k+10, 8k+11 - store result 1848 1849 eor3 $ctr5b, $res5b, $ctr5b, $rk10 @ AES block 8k+13 - result 1850 mov $ctr2.16b, $h3.16b @ CTR block 8k+18 1851 1852 stp $ctr4q, $ctr5q, [$output_ptr], #32 @ AES block 8k+12, 8k+13 - store result 1853 rev32 $ctr4.16b, $rtmp_ctr.16b @ CTR block 8k+20 1854 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+20 1855 1856 stp $ctr6q, $ctr7q, [$output_ptr], #32 @ AES block 8k+14, 8k+15 - store result 1857 mov $ctr3.16b, $h4.16b @ CTR block 8k+19 1858 b.lt .L128_dec_main_loop 1859 1860.L128_dec_prepretail: @ PREPRETAIL 1861 rev64 $res3b, $res3b @ GHASH block 8k+3 1862 ext $acc_lb, $acc_lb, $acc_lb, #8 @ PRE 0 1863 rev64 $res0b, $res0b @ GHASH block 8k 1864 1865 rev64 $res2b, $res2b @ GHASH block 8k+2 1866 rev32 $ctr5.16b, $rtmp_ctr.16b @ CTR block 8k+13 1867 ldp $rk0q, $rk1q, [$cc, #0] @ load rk0, rk1 1868 1869 ldr $h7q, [$current_tag, #176] @ load h7l | h7h 1870 ext $h7.16b, $h7.16b, $h7.16b, #8 1871 ldr $h8q, [$current_tag, #208] @ load h8l | h8h 1872 ext $h8.16b, $h8.16b, $h8.16b, #8 1873 eor $res0b, $res0b, $acc_lb @ PRE 1 1874 rev64 $res1b, $res1b @ GHASH block 8k+1 1875 1876 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+13 1877 ldr $h5q, [$current_tag, #128] @ load h5l | h5h 1878 ext $h5.16b, $h5.16b, $h5.16b, #8 1879 ldr $h6q, [$current_tag, #160] @ load h6l | h6h 1880 ext $h6.16b, $h6.16b, $h6.16b, #8 1881 rev64 $res5b, $res5b @ GHASH block 8k+5 1882 1883 rev64 $res4b, $res4b @ GHASH block 8k+4 1884 1885 rev64 $res6b, $res6b @ GHASH block 8k+6 1886 1887 ldr $h56kq, [$current_tag, #144] @ load h6k | h5k 1888 ldr $h78kq, [$current_tag, #192] @ load h8k | h7k 1889 rev32 $ctr6.16b, $rtmp_ctr.16b @ CTR block 8k+14 1890 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+14 1891 1892 pmull2 $t0.1q, $res1.2d, $h7.2d @ GHASH block 8k+1 - high 1893 pmull $acc_l.1q, $res0.1d, $h8.1d @ GHASH block 8k - low 1894 pmull2 $acc_h.1q, $res0.2d, $h8.2d @ GHASH block 8k - high 1895 1896 trn1 $acc_m.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid 1897 trn2 $res0.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid 1898 pmull2 $t1.1q, $res2.2d, $h6.2d @ GHASH block 8k+2 - high 1899 1900 pmull $h7.1q, $res1.1d, $h7.1d @ GHASH block 8k+1 - low 1901 pmull2 $t2.1q, $res3.2d, $h5.2d @ GHASH block 8k+3 - high 1902 aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 0 1903 1904 eor $acc_hb, $acc_hb, $t0.16b @ GHASH block 8k+1 - high 1905 aese $ctr4b, $rk0 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 0 1906 eor $res0.16b, $res0.16b, $acc_m.16b @ GHASH block 8k, 8k+1 - mid 1907 1908 pmull $h6.1q, $res2.1d, $h6.1d @ GHASH block 8k+2 - low 1909 rev32 $ctr7.16b, $rtmp_ctr.16b @ CTR block 8k+15 1910 aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 0 1911 1912 eor3 $acc_hb, $acc_hb, $t1.16b, $t2.16b @ GHASH block 8k+2, 8k+3 - high 1913 trn1 $t3.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid 1914 trn2 $res2.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid 1915 1916 aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 0 1917 aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 0 1918 aese $ctr5b, $rk0 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 0 1919 1920 pmull2 $acc_m.1q, $res0.2d, $h78k.2d @ GHASH block 8k - mid 1921 pmull $h78k.1q, $res0.1d, $h78k.1d @ GHASH block 8k+1 - mid 1922 pmull $h5.1q, $res3.1d, $h5.1d @ GHASH block 8k+3 - low 1923 1924 aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 1 1925 aese $ctr7b, $rk0 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 0 1926 aese $ctr6b, $rk0 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 0 1927 1928 eor $acc_lb, $acc_lb, $h7.16b @ GHASH block 8k+1 - low 1929 eor $res2.16b, $res2.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid 1930 eor $acc_mb, $acc_mb, $h78k.16b @ GHASH block 8k+1 - mid 1931 1932 aese $ctr6b, $rk1 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 1 1933 aese $ctr4b, $rk1 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 1 1934 aese $ctr5b, $rk1 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 1 1935 1936 ldp $rk2q, $rk3q, [$cc, #32] @ load rk2, rk3 1937 eor3 $acc_lb, $acc_lb, $h6.16b, $h5.16b @ GHASH block 8k+2, 8k+3 - low 1938 pmull2 $t3.1q, $res2.2d, $h56k.2d @ GHASH block 8k+2 - mid 1939 1940 ldr $h3q, [$current_tag, #80] @ load h3l | h3h 1941 ext $h3.16b, $h3.16b, $h3.16b, #8 1942 ldr $h4q, [$current_tag, #112] @ load h4l | h4h 1943 ext $h4.16b, $h4.16b, $h4.16b, #8 1944 aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 1 1945 pmull $h56k.1q, $res2.1d, $h56k.1d @ GHASH block 8k+3 - mid 1946 1947 aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 1 1948 aese $ctr7b, $rk1 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 1 1949 aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 1 1950 1951 ldr $h1q, [$current_tag, #32] @ load h1l | h1h 1952 ext $h1.16b, $h1.16b, $h1.16b, #8 1953 ldr $h2q, [$current_tag, #64] @ load h2l | h2h 1954 ext $h2.16b, $h2.16b, $h2.16b, #8 1955 eor3 $acc_mb, $acc_mb, $h56k.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid 1956 1957 aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 2 1958 aese $ctr6b, $rk2 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 2 1959 aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 2 1960 1961 aese $ctr4b, $rk2 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 2 1962 trn1 $t6.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid 1963 aese $ctr7b, $rk2 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 2 1964 1965 aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 2 1966 aese $ctr5b, $rk2 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 2 1967 aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 2 1968 1969 pmull2 $t4.1q, $res4.2d, $h4.2d @ GHASH block 8k+4 - high 1970 pmull $h4.1q, $res4.1d, $h4.1d @ GHASH block 8k+4 - low 1971 trn2 $res4.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid 1972 1973 ldp $rk4q, $rk5q, [$cc, #64] @ load rk4, rk5 1974 rev64 $res7b, $res7b @ GHASH block 8k+7 1975 aese $ctr6b, $rk3 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 3 1976 1977 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k 1978 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k 1979 pmull2 $t5.1q, $res5.2d, $h3.2d @ GHASH block 8k+5 - high 1980 pmull $h3.1q, $res5.1d, $h3.1d @ GHASH block 8k+5 - low 1981 1982 aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 3 1983 aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 3 1984 trn1 $t9.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid 1985 1986 pmull2 $t7.1q, $res6.2d, $h2.2d @ GHASH block 8k+6 - high 1987 pmull $h2.1q, $res6.1d, $h2.1d @ GHASH block 8k+6 - low 1988 trn2 $res6.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid 1989 1990 aese $ctr4b, $rk3 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 3 1991 aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 3 1992 aese $ctr7b, $rk3 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 3 1993 1994 aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 3 1995 aese $ctr5b, $rk3 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 3 1996 eor $res4.16b, $res4.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid 1997 1998 eor3 $acc_hb, $acc_hb, $t4.16b, $t5.16b @ GHASH block 8k+4, 8k+5 - high 1999 aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 4 2000 aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 4 2001 2002 eor $res6.16b, $res6.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid 2003 aese $ctr5b, $rk4 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 4 2004 pmull2 $t6.1q, $res4.2d, $h34k.2d @ GHASH block 8k+4 - mid 2005 2006 aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 4 2007 aese $ctr6b, $rk4 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 4 2008 aese $ctr4b, $rk4 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 4 2009 2010 aese $ctr7b, $rk4 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 4 2011 aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 4 2012 pmull $h34k.1q, $res4.1d, $h34k.1d @ GHASH block 8k+5 - mid 2013 2014 pmull2 $t8.1q, $res7.2d, $h1.2d @ GHASH block 8k+7 - high 2015 pmull2 $t9.1q, $res6.2d, $h12k.2d @ GHASH block 8k+6 - mid 2016 pmull $h12k.1q, $res6.1d, $h12k.1d @ GHASH block 8k+7 - mid 2017 2018 ldp $rk6q, $rk7q, [$cc, #96] @ load rk6, rk7 2019 eor3 $acc_mb, $acc_mb, $h34k.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid 2020 aese $ctr6b, $rk5 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 5 2021 2022 ldr $mod_constantd, [$modulo_constant] @ MODULO - load modulo constant 2023 pmull $h1.1q, $res7.1d, $h1.1d @ GHASH block 8k+7 - low 2024 eor3 $acc_lb, $acc_lb, $h4.16b, $h3.16b @ GHASH block 8k+4, 8k+5 - low 2025 2026 aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 5 2027 aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 5 2028 aese $ctr4b, $rk5 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 5 2029 2030 aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 5 2031 aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 5 2032 aese $ctr5b, $rk5 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 5 2033 2034 aese $ctr7b, $rk5 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 5 2035 eor3 $acc_mb, $acc_mb, $h12k.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid 2036 eor3 $acc_lb, $acc_lb, $h2.16b, $h1.16b @ GHASH block 8k+6, 8k+7 - low 2037 2038 aese $ctr4b, $rk6 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 6 2039 aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 6 2040 aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 6 2041 2042 eor3 $acc_hb, $acc_hb, $t7.16b, $t8.16b @ GHASH block 8k+6, 8k+7 - high 2043 aese $ctr5b, $rk6 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 6 2044 aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 6 2045 2046 aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 6 2047 aese $ctr6b, $rk6 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 6 2048 aese $ctr7b, $rk6 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 6 2049 2050 aese $ctr4b, $rk7 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 7 2051 eor3 $acc_mb, $acc_mb, $acc_hb, $acc_lb @ MODULO - karatsuba tidy up 2052 ldp $rk8q, $rk9q, [$cc, #128] @ load rk8, rk9 2053 2054 pmull $t12.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid 2055 aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 7 2056 ext $t11.16b, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment 2057 2058 aese $ctr5b, $rk7 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 7 2059 aese $ctr6b, $rk7 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 7 2060 aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 7 2061 2062 aese $ctr7b, $rk7 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 7 2063 aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 7 2064 aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 7 2065 2066 eor3 $acc_mb, $acc_mb, $t12.16b, $t11.16b @ MODULO - fold into mid 2067 ldr $rk10q, [$cc, #160] @ load rk10 2068 2069 aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 8 2070 aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 8 2071 2072 pmull $acc_h.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low 2073 aese $ctr6b, $rk8 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 8 2074 ext $t11.16b, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment 2075 2076 aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 8 2077 aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 8 2078 aese $ctr7b, $rk8 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 8 2079 2080 aese $ctr6b, $rk9 @ AES block 8k+14 - round 9 2081 aese $ctr5b, $rk8 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 8 2082 aese $ctr4b, $rk8 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 8 2083 2084 eor3 $acc_lb, $acc_lb, $acc_hb, $t11.16b @ MODULO - fold into low 2085 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+15 2086 aese $ctr2b, $rk9 @ AES block 8k+10 - round 9 2087 2088 aese $ctr3b, $rk9 @ AES block 8k+11 - round 9 2089 aese $ctr5b, $rk9 @ AES block 8k+13 - round 9 2090 aese $ctr0b, $rk9 @ AES block 8k+8 - round 9 2091 2092 aese $ctr4b, $rk9 @ AES block 8k+12 - round 9 2093 aese $ctr1b, $rk9 @ AES block 8k+9 - round 9 2094 aese $ctr7b, $rk9 @ AES block 8k+15 - round 9 2095 2096.L128_dec_tail: @ TAIL 2097 2098 mov $t1.16b, $rk10 2099 sub $main_end_input_ptr, $end_input_ptr, $input_ptr @ main_end_input_ptr is number of bytes left to process 2100 2101 cmp $main_end_input_ptr, #112 2102 2103 ldp $h78kq, $h8q, [$current_tag, #192] @ load h8k | h7k 2104 ext $h8.16b, $h8.16b, $h8.16b, #8 2105 ldr $res1q, [$input_ptr], #16 @ AES block 8k+8 - load ciphertext 2106 2107 ldp $h5q, $h56kq, [$current_tag, #128] @ load h5l | h5h 2108 ext $h5.16b, $h5.16b, $h5.16b, #8 2109 ext $t0.16b, $acc_lb, $acc_lb, #8 @ prepare final partial tag 2110 2111 ldp $h6q, $h7q, [$current_tag, #160] @ load h6l | h6h 2112 ext $h6.16b, $h6.16b, $h6.16b, #8 2113 ext $h7.16b, $h7.16b, $h7.16b, #8 2114 2115 eor3 $res4b, $res1b, $ctr0b, $t1.16b @ AES block 8k+8 - result 2116 b.gt .L128_dec_blocks_more_than_7 2117 2118 cmp $main_end_input_ptr, #96 2119 mov $ctr7b, $ctr6b 2120 movi $acc_l.8b, #0 2121 2122 movi $acc_h.8b, #0 2123 mov $ctr6b, $ctr5b 2124 mov $ctr5b, $ctr4b 2125 2126 mov $ctr4b, $ctr3b 2127 mov $ctr3b, $ctr2b 2128 mov $ctr2b, $ctr1b 2129 2130 movi $acc_m.8b, #0 2131 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 2132 b.gt .L128_dec_blocks_more_than_6 2133 2134 cmp $main_end_input_ptr, #80 2135 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 2136 2137 mov $ctr7b, $ctr6b 2138 mov $ctr6b, $ctr5b 2139 mov $ctr5b, $ctr4b 2140 2141 mov $ctr4b, $ctr3b 2142 mov $ctr3b, $ctr1b 2143 b.gt .L128_dec_blocks_more_than_5 2144 2145 cmp $main_end_input_ptr, #64 2146 2147 mov $ctr7b, $ctr6b 2148 mov $ctr6b, $ctr5b 2149 mov $ctr5b, $ctr4b 2150 2151 mov $ctr4b, $ctr1b 2152 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 2153 b.gt .L128_dec_blocks_more_than_4 2154 2155 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 2156 mov $ctr7b, $ctr6b 2157 mov $ctr6b, $ctr5b 2158 2159 mov $ctr5b, $ctr1b 2160 cmp $main_end_input_ptr, #48 2161 b.gt .L128_dec_blocks_more_than_3 2162 2163 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 2164 mov $ctr7b, $ctr6b 2165 cmp $main_end_input_ptr, #32 2166 2167 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k 2168 mov $ctr6b, $ctr1b 2169 b.gt .L128_dec_blocks_more_than_2 2170 2171 cmp $main_end_input_ptr, #16 2172 2173 mov $ctr7b, $ctr1b 2174 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 2175 b.gt L128_dec_blocks_more_than_1 2176 2177 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 2178 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k 2179 b .L128_dec_blocks_less_than_1 2180.L128_dec_blocks_more_than_7: @ blocks left > 7 2181 rev64 $res0b, $res1b @ GHASH final-7 block 2182 2183 eor $res0b, $res0b, $t0.16b @ feed in partial tag 2184 2185 ins $acc_m.d[0], $h78k.d[1] @ GHASH final-7 block - mid 2186 2187 pmull $acc_l.1q, $res0.1d, $h8.1d @ GHASH final-7 block - low 2188 ins $rk4v.d[0], $res0.d[1] @ GHASH final-7 block - mid 2189 2190 movi $t0.8b, #0 @ suppress further partial tag feed in 2191 ldr $res1q, [$input_ptr], #16 @ AES final-6 block - load ciphertext 2192 2193 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-7 block - mid 2194 2195 pmull2 $acc_h.1q, $res0.2d, $h8.2d @ GHASH final-7 block - high 2196 st1 { $res4b}, [$output_ptr], #16 @ AES final-7 block - store result 2197 eor3 $res4b, $res1b, $ctr1b, $t1.16b @ AES final-6 block - result 2198 2199 pmull $acc_m.1q, $rk4v.1d, $acc_m.1d @ GHASH final-7 block - mid 2200.L128_dec_blocks_more_than_6: @ blocks left > 6 2201 2202 rev64 $res0b, $res1b @ GHASH final-6 block 2203 2204 eor $res0b, $res0b, $t0.16b @ feed in partial tag 2205 2206 ins $rk4v.d[0], $res0.d[1] @ GHASH final-6 block - mid 2207 2208 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-6 block - mid 2209 2210 pmull $rk3q1, $res0.1d, $h7.1d @ GHASH final-6 block - low 2211 ldr $res1q, [$input_ptr], #16 @ AES final-5 block - load ciphertext 2212 movi $t0.8b, #0 @ suppress further partial tag feed in 2213 2214 pmull $rk4v.1q, $rk4v.1d, $h78k.1d @ GHASH final-6 block - mid 2215 st1 { $res4b}, [$output_ptr], #16 @ AES final-6 block - store result 2216 pmull2 $rk2q1, $res0.2d, $h7.2d @ GHASH final-6 block - high 2217 2218 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-6 block - low 2219 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-6 block - high 2220 2221 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-6 block - mid 2222 eor3 $res4b, $res1b, $ctr2b, $t1.16b @ AES final-5 block - result 2223.L128_dec_blocks_more_than_5: @ blocks left > 5 2224 2225 rev64 $res0b, $res1b @ GHASH final-5 block 2226 2227 ldr $res1q, [$input_ptr], #16 @ AES final-4 block - load ciphertext 2228 st1 { $res4b}, [$output_ptr], #16 @ AES final-5 block - store result 2229 2230 eor $res0b, $res0b, $t0.16b @ feed in partial tag 2231 2232 ins $rk4v.d[0], $res0.d[1] @ GHASH final-5 block - mid 2233 2234 eor3 $res4b, $res1b, $ctr3b, $t1.16b @ AES final-4 block - result 2235 2236 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-5 block - mid 2237 2238 ins $rk4v.d[1], $rk4v.d[0] @ GHASH final-5 block - mid 2239 pmull $rk3q1, $res0.1d, $h6.1d @ GHASH final-5 block - low 2240 movi $t0.8b, #0 @ suppress further partial tag feed in 2241 2242 pmull2 $rk4v.1q, $rk4v.2d, $h56k.2d @ GHASH final-5 block - mid 2243 pmull2 $rk2q1, $res0.2d, $h6.2d @ GHASH final-5 block - high 2244 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-5 block - low 2245 2246 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-5 block - mid 2247 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-5 block - high 2248.L128_dec_blocks_more_than_4: @ blocks left > 4 2249 2250 rev64 $res0b, $res1b @ GHASH final-4 block 2251 2252 eor $res0b, $res0b, $t0.16b @ feed in partial tag 2253 ldr $res1q, [$input_ptr], #16 @ AES final-3 block - load ciphertext 2254 2255 ins $rk4v.d[0], $res0.d[1] @ GHASH final-4 block - mid 2256 movi $t0.8b, #0 @ suppress further partial tag feed in 2257 pmull2 $rk2q1, $res0.2d, $h5.2d @ GHASH final-4 block - high 2258 2259 pmull $rk3q1, $res0.1d, $h5.1d @ GHASH final-4 block - low 2260 2261 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-4 block - high 2262 2263 st1 { $res4b}, [$output_ptr], #16 @ AES final-4 block - store result 2264 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-4 block - mid 2265 2266 eor3 $res4b, $res1b, $ctr4b, $t1.16b @ AES final-3 block - result 2267 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-4 block - low 2268 2269 pmull $rk4v.1q, $rk4v.1d, $h56k.1d @ GHASH final-4 block - mid 2270 2271 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-4 block - mid 2272.L128_dec_blocks_more_than_3: @ blocks left > 3 2273 2274 st1 { $res4b}, [$output_ptr], #16 @ AES final-3 block - store result 2275 rev64 $res0b, $res1b @ GHASH final-3 block 2276 2277 eor $res0b, $res0b, $t0.16b @ feed in partial tag 2278 2279 ins $rk4v.d[0], $res0.d[1] @ GHASH final-3 block - mid 2280 2281 ldr $h4q, [$current_tag, #112] @ load h4l | h4h 2282 ext $h4.16b, $h4.16b, $h4.16b, #8 2283 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k 2284 2285 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-3 block - mid 2286 2287 ldr $res1q, [$input_ptr], #16 @ AES final-2 block - load ciphertext 2288 2289 ins $rk4v.d[1], $rk4v.d[0] @ GHASH final-3 block - mid 2290 pmull $rk3q1, $res0.1d, $h4.1d @ GHASH final-3 block - low 2291 pmull2 $rk2q1, $res0.2d, $h4.2d @ GHASH final-3 block - high 2292 2293 movi $t0.8b, #0 @ suppress further partial tag feed in 2294 eor3 $res4b, $res1b, $ctr5b, $t1.16b @ AES final-2 block - result 2295 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-3 block - low 2296 2297 pmull2 $rk4v.1q, $rk4v.2d, $h34k.2d @ GHASH final-3 block - mid 2298 2299 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-3 block - high 2300 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-3 block - mid 2301.L128_dec_blocks_more_than_2: @ blocks left > 2 2302 2303 rev64 $res0b, $res1b @ GHASH final-2 block 2304 2305 st1 { $res4b}, [$output_ptr], #16 @ AES final-2 block - store result 2306 2307 eor $res0b, $res0b, $t0.16b @ feed in partial tag 2308 ldr $h3q, [$current_tag, #80] @ load h3l | h3h 2309 ext $h3.16b, $h3.16b, $h3.16b, #8 2310 movi $t0.8b, #0 @ suppress further partial tag feed in 2311 2312 ins $rk4v.d[0], $res0.d[1] @ GHASH final-2 block - mid 2313 2314 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-2 block - mid 2315 2316 pmull $rk3q1, $res0.1d, $h3.1d @ GHASH final-2 block - low 2317 2318 pmull2 $rk2q1, $res0.2d, $h3.2d @ GHASH final-2 block - high 2319 pmull $rk4v.1q, $rk4v.1d, $h34k.1d @ GHASH final-2 block - mid 2320 ldr $res1q, [$input_ptr], #16 @ AES final-1 block - load ciphertext 2321 2322 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-2 block - mid 2323 2324 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-2 block - low 2325 2326 eor3 $res4b, $res1b, $ctr6b, $t1.16b @ AES final-1 block - result 2327 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-2 block - high 2328.L128_dec_blocks_more_than_1: @ blocks left > 1 2329 2330 st1 { $res4b}, [$output_ptr], #16 @ AES final-1 block - store result 2331 rev64 $res0b, $res1b @ GHASH final-1 block 2332 2333 ldr $h2q, [$current_tag, #64] @ load h2l | h2h 2334 ext $h2.16b, $h2.16b, $h2.16b, #8 2335 2336 eor $res0b, $res0b, $t0.16b @ feed in partial tag 2337 2338 movi $t0.8b, #0 @ suppress further partial tag feed in 2339 2340 ins $rk4v.d[0], $res0.d[1] @ GHASH final-1 block - mid 2341 2342 ldr $res1q, [$input_ptr], #16 @ AES final block - load ciphertext 2343 pmull2 $rk2q1, $res0.2d, $h2.2d @ GHASH final-1 block - high 2344 2345 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-1 block - mid 2346 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-1 block - high 2347 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k 2348 2349 ins $rk4v.d[1], $rk4v.d[0] @ GHASH final-1 block - mid 2350 eor3 $res4b, $res1b, $ctr7b, $t1.16b @ AES final block - result 2351 2352 pmull $rk3q1, $res0.1d, $h2.1d @ GHASH final-1 block - low 2353 2354 pmull2 $rk4v.1q, $rk4v.2d, $h12k.2d @ GHASH final-1 block - mid 2355 2356 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-1 block - low 2357 2358 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-1 block - mid 2359.L128_dec_blocks_less_than_1: @ blocks left <= 1 2360 2361 and $bit_length, $bit_length, #127 @ bit_length %= 128 2362 2363 sub $bit_length, $bit_length, #128 @ bit_length -= 128 2364 2365 neg $bit_length, $bit_length @ bit_length = 128 - #bits in input (in range [1,128]) 2366 2367 mvn $temp0_x, xzr @ temp0_x = 0xffffffffffffffff 2368 and $bit_length, $bit_length, #127 @ bit_length %= 128 2369 2370 lsr $temp0_x, $temp0_x, $bit_length @ temp0_x is mask for top 64b of last block 2371 cmp $bit_length, #64 2372 mvn $temp1_x, xzr @ temp1_x = 0xffffffffffffffff 2373 2374 csel $temp2_x, $temp1_x, $temp0_x, lt 2375 csel $temp3_x, $temp0_x, xzr, lt 2376 2377 mov $ctr0.d[1], $temp3_x 2378 mov $ctr0.d[0], $temp2_x @ ctr0b is mask for last block 2379 2380 ldr $h1q, [$current_tag, #32] @ load h1l | h1h 2381 ext $h1.16b, $h1.16b, $h1.16b, #8 2382 ld1 { $rk0}, [$output_ptr] @ load existing bytes where the possibly partial last block is to be stored 2383 2384 and $res1b, $res1b, $ctr0b @ possibly partial last block has zeroes in highest bits 2385 2386 rev64 $res0b, $res1b @ GHASH final block 2387 2388 eor $res0b, $res0b, $t0.16b @ feed in partial tag 2389 2390 pmull2 $rk2q1, $res0.2d, $h1.2d @ GHASH final block - high 2391 ins $t0.d[0], $res0.d[1] @ GHASH final block - mid 2392 2393 eor $acc_hb, $acc_hb, $rk2 @ GHASH final block - high 2394 eor $t0.8b, $t0.8b, $res0.8b @ GHASH final block - mid 2395 2396 bif $res4b, $rk0, $ctr0b @ insert existing bytes in top end of result before storing 2397 2398 pmull $t0.1q, $t0.1d, $h12k.1d @ GHASH final block - mid 2399 st1 { $res4b}, [$output_ptr] @ store all 16B 2400 2401 pmull $rk3q1, $res0.1d, $h1.1d @ GHASH final block - low 2402 2403 eor $acc_mb, $acc_mb, $t0.16b @ GHASH final block - mid 2404 ldr $mod_constantd, [$modulo_constant] @ MODULO - load modulo constant 2405 2406 eor $acc_lb, $acc_lb, $rk3 @ GHASH final block - low 2407 2408 eor $t10.16b, $acc_hb, $acc_lb @ MODULO - karatsuba tidy up 2409 2410 pmull $t11.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid 2411 ext $acc_hb, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment 2412 2413 eor $acc_mb, $acc_mb, $t10.16b @ MODULO - karatsuba tidy up 2414 2415 eor3 $acc_mb, $acc_mb, $acc_hb, $t11.16b @ MODULO - fold into mid 2416 2417 pmull $acc_h.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low 2418 ext $acc_mb, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment 2419 2420 eor3 $acc_lb, $acc_lb, $acc_mb, $acc_hb @ MODULO - fold into low 2421 ext $acc_lb, $acc_lb, $acc_lb, #8 2422 rev64 $acc_lb, $acc_lb 2423 st1 { $acc_l.16b }, [$current_tag] 2424 rev32 $rtmp_ctr.16b, $rtmp_ctr.16b 2425 2426 str $rtmp_ctrq, [$counter] @ store the updated counter 2427 2428 mov x0, $byte_length 2429 2430 ldp d10, d11, [sp, #16] 2431 ldp d12, d13, [sp, #32] 2432 ldp d14, d15, [sp, #48] 2433 ldp d8, d9, [sp], #80 2434 ret 2435.L128_dec_ret: 2436 mov w0, #0x0 2437 ret 2438.size unroll8_eor3_aes_gcm_dec_128_kernel,.-unroll8_eor3_aes_gcm_dec_128_kernel 2439___ 2440} 2441 2442{ 2443my ($end_input_ptr,$main_end_input_ptr,$temp0_x,$temp1_x)=map("x$_",(4..7)); 2444my ($temp2_x,$temp3_x)=map("x$_",(13..14)); 2445my ($ctr0b,$ctr1b,$ctr2b,$ctr3b,$ctr4b,$ctr5b,$ctr6b,$ctr7b,$res0b,$res1b,$res2b,$res3b,$res4b,$res5b,$res6b,$res7b)=map("v$_.16b",(0..15)); 2446my ($ctr0,$ctr1,$ctr2,$ctr3,$ctr4,$ctr5,$ctr6,$ctr7,$res0,$res1,$res2,$res3,$res4,$res5,$res6,$res7)=map("v$_",(0..15)); 2447my ($ctr0d,$ctr1d,$ctr2d,$ctr3d,$ctr4d,$ctr5d,$ctr6d,$ctr7d)=map("d$_",(0..7)); 2448my ($ctr0q,$ctr1q,$ctr2q,$ctr3q,$ctr4q,$ctr5q,$ctr6q,$ctr7q)=map("q$_",(0..7)); 2449my ($res0q,$res1q,$res2q,$res3q,$res4q,$res5q,$res6q,$res7q)=map("q$_",(8..15)); 2450 2451my ($ctr_t0,$ctr_t1,$ctr_t2,$ctr_t3,$ctr_t4,$ctr_t5,$ctr_t6,$ctr_t7)=map("v$_",(8..15)); 2452my ($ctr_t0b,$ctr_t1b,$ctr_t2b,$ctr_t3b,$ctr_t4b,$ctr_t5b,$ctr_t6b,$ctr_t7b)=map("v$_.16b",(8..15)); 2453my ($ctr_t0q,$ctr_t1q,$ctr_t2q,$ctr_t3q,$ctr_t4q,$ctr_t5q,$ctr_t6q,$ctr_t7q)=map("q$_",(8..15)); 2454 2455my ($acc_hb,$acc_mb,$acc_lb)=map("v$_.16b",(17..19)); 2456my ($acc_h,$acc_m,$acc_l)=map("v$_",(17..19)); 2457 2458my ($h1,$h12k,$h2,$h3,$h34k,$h4)=map("v$_",(20..25)); 2459my ($h5,$h56k,$h6,$h7,$h78k,$h8)=map("v$_",(20..25)); 2460my ($h1q,$h12kq,$h2q,$h3q,$h34kq,$h4q)=map("q$_",(20..25)); 2461my ($h5q,$h56kq,$h6q,$h7q,$h78kq,$h8q)=map("q$_",(20..25)); 2462 2463my $t0="v16"; 2464my $t0d="d16"; 2465 2466my $t1="v29"; 2467my $t2=$res1; 2468my $t3=$t1; 2469 2470my $t4=$res0; 2471my $t5=$res2; 2472my $t6=$t0; 2473 2474my $t7=$res3; 2475my $t8=$res4; 2476my $t9=$res5; 2477 2478my $t10=$res6; 2479my $t11="v21"; 2480my $t12=$t1; 2481 2482my $rtmp_ctr="v30"; 2483my $rtmp_ctrq="q30"; 2484my $rctr_inc="v31"; 2485my $rctr_incd="d31"; 2486 2487my $mod_constantd=$t0d; 2488my $mod_constant=$t0; 2489 2490my ($rk0,$rk1,$rk2)=map("v$_.16b",(26..28)); 2491my ($rk3,$rk4,$rk5)=map("v$_.16b",(26..28)); 2492my ($rk6,$rk7,$rk8)=map("v$_.16b",(26..28)); 2493my ($rk9,$rk10,$rk11)=map("v$_.16b",(26..28)); 2494my ($rk12,$rk13,$rk14)=map("v$_.16b",(26..28)); 2495my ($rk0q,$rk1q,$rk2q)=map("q$_",(26..28)); 2496my ($rk3q,$rk4q,$rk5q)=map("q$_",(26..28)); 2497my ($rk6q,$rk7q,$rk8q)=map("q$_",(26..28)); 2498my ($rk9q,$rk10q,$rk11q)=map("q$_",(26..28)); 2499my ($rk12q,$rk13q,$rk14q)=map("q$_",(26..28)); 2500my $rk2q1="v28.1q"; 2501my $rk3q1="v26.1q"; 2502my $rk4v="v27"; 2503 2504######################################################################################### 2505# size_t unroll8_eor3_aes_gcm_enc_192_kernel(const uint8_t * plaintext, 2506# uint64_t plaintext_length, 2507# uint8_t * ciphertext, 2508# uint64_t *Xi, 2509# unsigned char ivec[16], 2510# const void *key); 2511# 2512$code.=<<___; 2513.global unroll8_eor3_aes_gcm_enc_192_kernel 2514.type unroll8_eor3_aes_gcm_enc_192_kernel,%function 2515.align 4 2516unroll8_eor3_aes_gcm_enc_192_kernel: 2517 AARCH64_VALID_CALL_TARGET 2518 cbz x1, .L192_enc_ret 2519 stp d8, d9, [sp, #-80]! 2520 lsr $byte_length, $bit_length, #3 2521 mov $counter, x4 2522 mov $cc, x5 2523 stp d10, d11, [sp, #16] 2524 stp d12, d13, [sp, #32] 2525 stp d14, d15, [sp, #48] 2526 mov x5, #0xc200000000000000 2527 stp x5, xzr, [sp, #64] 2528 add $modulo_constant, sp, #64 2529 2530 mov $main_end_input_ptr, $byte_length 2531 ld1 { $ctr0b}, [$counter] @ CTR block 0 2532 2533 mov $constant_temp, #0x100000000 @ set up counter increment 2534 movi $rctr_inc.16b, #0x0 2535 mov $rctr_inc.d[1], $constant_temp 2536 2537 rev32 $rtmp_ctr.16b, $ctr0.16b @ set up reversed counter 2538 2539 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 0 2540 2541 rev32 $ctr1.16b, $rtmp_ctr.16b @ CTR block 1 2542 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 1 2543 2544 rev32 $ctr2.16b, $rtmp_ctr.16b @ CTR block 2 2545 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 2 2546 2547 rev32 $ctr3.16b, $rtmp_ctr.16b @ CTR block 3 2548 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 3 2549 2550 rev32 $ctr4.16b, $rtmp_ctr.16b @ CTR block 4 2551 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 4 2552 sub $main_end_input_ptr, $main_end_input_ptr, #1 @ byte_len - 1 2553 2554 and $main_end_input_ptr, $main_end_input_ptr, #0xffffffffffffff80 @ number of bytes to be processed in main loop (at least 1 byte must be handled by tail) 2555 2556 rev32 $ctr5.16b, $rtmp_ctr.16b @ CTR block 5 2557 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 5 2558 ldp $rk0q, $rk1q, [$cc, #0] @ load rk0, rk1 2559 2560 add $main_end_input_ptr, $main_end_input_ptr, $input_ptr 2561 2562 rev32 $ctr6.16b, $rtmp_ctr.16b @ CTR block 6 2563 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 6 2564 2565 rev32 $ctr7.16b, $rtmp_ctr.16b @ CTR block 7 2566 2567 aese $ctr5b, $rk0 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 0 2568 aese $ctr4b, $rk0 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 0 2569 aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 0 2570 2571 aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 0 2572 aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 0 2573 aese $ctr7b, $rk0 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 0 2574 2575 aese $ctr6b, $rk0 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 0 2576 aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 0 2577 ldp $rk2q, $rk3q, [$cc, #32] @ load rk2, rk3 2578 2579 aese $ctr5b, $rk1 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 1 2580 aese $ctr7b, $rk1 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 1 2581 2582 aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 1 2583 aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 1 2584 aese $ctr6b, $rk1 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 1 2585 2586 aese $ctr5b, $rk2 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 2 2587 aese $ctr4b, $rk1 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 1 2588 aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 1 2589 2590 aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 1 2591 aese $ctr7b, $rk2 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 2 2592 aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 2 2593 2594 aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 2 2595 aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 2 2596 2597 aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 2 2598 aese $ctr4b, $rk2 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 2 2599 aese $ctr6b, $rk2 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 2 2600 2601 ldp $rk4q, $rk5q, [$cc, #64] @ load rk4, rk5 2602 aese $ctr4b, $rk3 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 3 2603 2604 aese $ctr7b, $rk3 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 3 2605 aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 3 2606 aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 3 2607 2608 aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 3 2609 2610 aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 3 2611 2612 aese $ctr6b, $rk3 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 3 2613 2614 aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 4 2615 aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 4 2616 aese $ctr5b, $rk3 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 3 2617 2618 aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 4 2619 aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 4 2620 aese $ctr4b, $rk4 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 4 2621 2622 aese $ctr6b, $rk4 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 4 2623 aese $ctr7b, $rk4 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 4 2624 aese $ctr5b, $rk4 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 4 2625 2626 aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 5 2627 ldp $rk6q, $rk7q, [$cc, #96] @ load rk6, rk7 2628 aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 5 2629 2630 aese $ctr4b, $rk5 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 5 2631 aese $ctr7b, $rk5 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 5 2632 aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 5 2633 2634 aese $ctr5b, $rk5 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 5 2635 aese $ctr6b, $rk5 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 5 2636 aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 5 2637 2638 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 7 2639 2640 aese $ctr5b, $rk6 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 6 2641 aese $ctr4b, $rk6 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 6 2642 aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 6 2643 2644 aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 6 2645 aese $ctr6b, $rk6 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 6 2646 aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 6 2647 2648 aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 6 2649 aese $ctr7b, $rk6 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 6 2650 ldp $rk8q, $rk9q, [$cc, #128] @ load rk8, rk9 2651 2652 aese $ctr6b, $rk7 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 7 2653 aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 7 2654 2655 aese $ctr4b, $rk7 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 7 2656 aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 7 2657 2658 aese $ctr7b, $rk7 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 7 2659 aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 7 2660 2661 aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 7 2662 aese $ctr5b, $rk7 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 7 2663 2664 aese $ctr7b, $rk8 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 8 2665 aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 8 2666 2667 aese $ctr4b, $rk8 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 8 2668 aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 8 2669 aese $ctr5b, $rk8 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 8 2670 2671 aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 8 2672 aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 8 2673 aese $ctr6b, $rk8 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 8 2674 2675 add $end_input_ptr, $input_ptr, $bit_length, lsr #3 @ end_input_ptr 2676 cmp $input_ptr, $main_end_input_ptr @ check if we have <= 8 blocks 2677 aese $ctr3b, $rk9 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 9 2678 2679 ld1 { $acc_lb}, [$current_tag] 2680 ext $acc_lb, $acc_lb, $acc_lb, #8 2681 rev64 $acc_lb, $acc_lb 2682 ldp $rk10q, $rk11q, [$cc, #160] @ load rk10, rk11 2683 2684 aese $ctr6b, $rk9 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 9 2685 aese $ctr1b, $rk9 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 9 2686 2687 aese $ctr5b, $rk9 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 9 2688 aese $ctr2b, $rk9 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 9 2689 2690 aese $ctr0b, $rk9 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 9 2691 aese $ctr4b, $rk9 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 9 2692 2693 aese $ctr6b, $rk10 \n aesmc $ctr6b, $ctr6b @ AES block 14 - round 10 2694 aese $ctr7b, $rk9 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 9 2695 aese $ctr3b, $rk10 \n aesmc $ctr3b, $ctr3b @ AES block 11 - round 10 2696 2697 aese $ctr1b, $rk10 \n aesmc $ctr1b, $ctr1b @ AES block 9 - round 10 2698 aese $ctr5b, $rk10 \n aesmc $ctr5b, $ctr5b @ AES block 13 - round 10 2699 aese $ctr4b, $rk10 \n aesmc $ctr4b, $ctr4b @ AES block 12 - round 10 2700 2701 aese $ctr0b, $rk10 \n aesmc $ctr0b, $ctr0b @ AES block 8 - round 10 2702 aese $ctr2b, $rk10 \n aesmc $ctr2b, $ctr2b @ AES block 10 - round 10 2703 aese $ctr7b, $rk10 \n aesmc $ctr7b, $ctr7b @ AES block 15 - round 10 2704 2705 aese $ctr6b, $rk11 @ AES block 14 - round 11 2706 aese $ctr3b, $rk11 @ AES block 11 - round 11 2707 2708 aese $ctr4b, $rk11 @ AES block 12 - round 11 2709 aese $ctr7b, $rk11 @ AES block 15 - round 11 2710 ldr $rk12q, [$cc, #192] @ load rk12 2711 2712 aese $ctr1b, $rk11 @ AES block 9 - round 11 2713 aese $ctr5b, $rk11 @ AES block 13 - round 11 2714 2715 aese $ctr2b, $rk11 @ AES block 10 - round 11 2716 aese $ctr0b, $rk11 @ AES block 8 - round 11 2717 b.ge .L192_enc_tail @ handle tail 2718 2719 ldp $ctr_t0q, $ctr_t1q, [$input_ptr], #32 @ AES block 0, 1 - load plaintext 2720 2721 ldp $ctr_t2q, $ctr_t3q, [$input_ptr], #32 @ AES block 2, 3 - load plaintext 2722 2723 ldp $ctr_t4q, $ctr_t5q, [$input_ptr], #32 @ AES block 4, 5 - load plaintext 2724 2725 ldp $ctr_t6q, $ctr_t7q, [$input_ptr], #32 @ AES block 6, 7 - load plaintext 2726 2727 eor3 $res0b, $ctr_t0b, $ctr0b, $rk12 @ AES block 0 - result 2728 rev32 $ctr0.16b, $rtmp_ctr.16b @ CTR block 8 2729 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8 2730 2731 eor3 $res3b, $ctr_t3b, $ctr3b, $rk12 @ AES block 3 - result 2732 eor3 $res1b, $ctr_t1b, $ctr1b, $rk12 @ AES block 1 - result 2733 2734 rev32 $ctr1.16b, $rtmp_ctr.16b @ CTR block 9 2735 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 9 2736 eor3 $res4b, $ctr_t4b, $ctr4b, $rk12 @ AES block 4 - result 2737 2738 eor3 $res5b, $ctr_t5b, $ctr5b, $rk12 @ AES block 5 - result 2739 eor3 $res7b, $ctr_t7b, $ctr7b, $rk12 @ AES block 7 - result 2740 stp $res0q, $res1q, [$output_ptr], #32 @ AES block 0, 1 - store result 2741 2742 eor3 $res2b, $ctr_t2b, $ctr2b, $rk12 @ AES block 2 - result 2743 rev32 $ctr2.16b, $rtmp_ctr.16b @ CTR block 10 2744 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 10 2745 2746 stp $res2q, $res3q, [$output_ptr], #32 @ AES block 2, 3 - store result 2747 cmp $input_ptr, $main_end_input_ptr @ check if we have <= 8 blocks 2748 2749 rev32 $ctr3.16b, $rtmp_ctr.16b @ CTR block 11 2750 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 11 2751 eor3 $res6b, $ctr_t6b, $ctr6b, $rk12 @ AES block 6 - result 2752 2753 stp $res4q, $res5q, [$output_ptr], #32 @ AES block 4, 5 - store result 2754 2755 rev32 $ctr4.16b, $rtmp_ctr.16b @ CTR block 12 2756 stp $res6q, $res7q, [$output_ptr], #32 @ AES block 6, 7 - store result 2757 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 12 2758 2759 b.ge .L192_enc_prepretail @ do prepretail 2760 2761.L192_enc_main_loop: @ main loop start 2762 rev64 $res4b, $res4b @ GHASH block 8k+4 (t0, t1, and t2 free) 2763 ldp $rk0q, $rk1q, [$cc, #0] @ load rk0, rk1 2764 rev64 $res2b, $res2b @ GHASH block 8k+2 2765 2766 rev32 $ctr5.16b, $rtmp_ctr.16b @ CTR block 8k+13 2767 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+13 2768 ldr $h7q, [$current_tag, #176] @ load h7l | h7h 2769 ext $h7.16b, $h7.16b, $h7.16b, #8 2770 ldr $h8q, [$current_tag, #208] @ load h8l | h8h 2771 ext $h8.16b, $h8.16b, $h8.16b, #8 2772 2773 ext $acc_lb, $acc_lb, $acc_lb, #8 @ PRE 0 2774 rev64 $res0b, $res0b @ GHASH block 8k 2775 ldr $h5q, [$current_tag, #128] @ load h5l | h5h 2776 ext $h5.16b, $h5.16b, $h5.16b, #8 2777 ldr $h6q, [$current_tag, #160] @ load h6l | h6h 2778 ext $h6.16b, $h6.16b, $h6.16b, #8 2779 2780 rev64 $res1b, $res1b @ GHASH block 8k+1 2781 rev32 $ctr6.16b, $rtmp_ctr.16b @ CTR block 8k+14 2782 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+14 2783 2784 eor $res0b, $res0b, $acc_lb @ PRE 1 2785 rev64 $res3b, $res3b @ GHASH block 8k+3 2786 rev64 $res5b, $res5b @ GHASH block 8k+5 (t0, t1, t2 and t3 free) 2787 2788 aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 0 2789 rev32 $ctr7.16b, $rtmp_ctr.16b @ CTR block 8k+15 2790 aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 0 2791 2792 aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 0 2793 aese $ctr5b, $rk0 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 0 2794 aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 0 2795 2796 aese $ctr7b, $rk0 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 0 2797 aese $ctr4b, $rk0 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 0 2798 aese $ctr6b, $rk0 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 0 2799 2800 ldp $rk2q, $rk3q, [$cc, #32] @ load rk2, rk3 2801 pmull2 $acc_h.1q, $res0.2d, $h8.2d @ GHASH block 8k - high 2802 aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 1 2803 2804 aese $ctr4b, $rk1 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 1 2805 pmull2 $t0.1q, $res1.2d, $h7.2d @ GHASH block 8k+1 - high 2806 pmull $h7.1q, $res1.1d, $h7.1d @ GHASH block 8k+1 - low 2807 2808 trn1 $acc_m.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid 2809 aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 1 2810 ldr $h56kq, [$current_tag, #144] @ load h6k | h5k 2811 ldr $h78kq, [$current_tag, #192] @ load h8k | h7k 2812 2813 pmull2 $t1.1q, $res2.2d, $h6.2d @ GHASH block 8k+2 - high 2814 pmull $acc_l.1q, $res0.1d, $h8.1d @ GHASH block 8k - low 2815 trn2 $res0.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid 2816 2817 aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 1 2818 aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 1 2819 aese $ctr5b, $rk1 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 1 2820 2821 eor $acc_hb, $acc_hb, $t0.16b @ GHASH block 8k+1 - high 2822 aese $ctr6b, $rk1 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 1 2823 aese $ctr7b, $rk1 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 1 2824 2825 pmull2 $t2.1q, $res3.2d, $h5.2d @ GHASH block 8k+3 - high 2826 eor $res0.16b, $res0.16b, $acc_m.16b @ GHASH block 8k, 8k+1 - mid 2827 aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 2 2828 2829 aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 2 2830 aese $ctr4b, $rk2 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 2 2831 aese $ctr6b, $rk2 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 2 2832 2833 aese $ctr5b, $rk2 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 2 2834 aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 3 2835 eor3 $acc_hb, $acc_hb, $t1.16b, $t2.16b @ GHASH block 8k+2, 8k+3 - high 2836 2837 pmull $h6.1q, $res2.1d, $h6.1d @ GHASH block 8k+2 - low 2838 aese $ctr7b, $rk2 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 2 2839 aese $ctr4b, $rk3 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 3 2840 2841 aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 2 2842 trn1 $t3.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid 2843 aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 2 2844 2845 trn2 $res2.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid 2846 aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 3 2847 ldp $rk4q, $rk5q, [$cc, #64] @ load rk4, rk5 2848 2849 aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 3 2850 eor $acc_lb, $acc_lb, $h7.16b @ GHASH block 8k+1 - low 2851 ldr $h3q, [$current_tag, #80] @ load h3l | h3h 2852 ext $h3.16b, $h3.16b, $h3.16b, #8 2853 ldr $h4q, [$current_tag, #112] @ load h4l | h4h 2854 ext $h4.16b, $h4.16b, $h4.16b, #8 2855 2856 pmull2 $acc_m.1q, $res0.2d, $h78k.2d @ GHASH block 8k - mid 2857 pmull $h78k.1q, $res0.1d, $h78k.1d @ GHASH block 8k+1 - mid 2858 pmull $h5.1q, $res3.1d, $h5.1d @ GHASH block 8k+3 - low 2859 2860 aese $ctr5b, $rk3 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 3 2861 eor $res2.16b, $res2.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid 2862 trn1 $t6.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid 2863 2864 eor $acc_mb, $acc_mb, $h78k.16b @ GHASH block 8k+1 - mid 2865 aese $ctr6b, $rk3 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 3 2866 eor3 $acc_lb, $acc_lb, $h6.16b, $h5.16b @ GHASH block 8k+2, 8k+3 - low 2867 2868 aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 4 2869 aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 4 2870 aese $ctr7b, $rk3 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 3 2871 2872 pmull2 $t3.1q, $res2.2d, $h56k.2d @ GHASH block 8k+2 - mid 2873 aese $ctr6b, $rk4 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 4 2874 aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 3 2875 2876 pmull $h56k.1q, $res2.1d, $h56k.1d @ GHASH block 8k+3 - mid 2877 aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 4 2878 aese $ctr4b, $rk4 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 4 2879 2880 aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 4 2881 aese $ctr5b, $rk4 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 4 2882 aese $ctr7b, $rk4 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 4 2883 2884 eor3 $acc_mb, $acc_mb, $h56k.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid 2885 aese $ctr4b, $rk5 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 5 2886 ldr $h1q, [$current_tag, #32] @ load h1l | h1h 2887 ext $h1.16b, $h1.16b, $h1.16b, #8 2888 ldr $h2q, [$current_tag, #64] @ load h2l | h2h 2889 ext $h2.16b, $h2.16b, $h2.16b, #8 2890 2891 ldp $rk6q, $rk7q, [$cc, #96] @ load rk6, rk7 2892 aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 5 2893 rev64 $res7b, $res7b @ GHASH block 8k+7 (t0, t1, t2 and t3 free) 2894 2895 rev64 $res6b, $res6b @ GHASH block 8k+6 (t0, t1, and t2 free) 2896 pmull2 $t4.1q, $res4.2d, $h4.2d @ GHASH block 8k+4 - high 2897 pmull $h4.1q, $res4.1d, $h4.1d @ GHASH block 8k+4 - low 2898 2899 aese $ctr5b, $rk5 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 5 2900 trn2 $res4.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid 2901 2902 aese $ctr6b, $rk5 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 5 2903 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k 2904 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k 2905 2906 aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 5 2907 pmull2 $t5.1q, $res5.2d, $h3.2d @ GHASH block 8k+5 - high 2908 eor $res4.16b, $res4.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid 2909 2910 aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 5 2911 aese $ctr7b, $rk5 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 5 2912 aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 5 2913 2914 pmull $h3.1q, $res5.1d, $h3.1d @ GHASH block 8k+5 - low 2915 aese $ctr4b, $rk6 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 6 2916 trn1 $t9.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid 2917 2918 aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 6 2919 aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 6 2920 pmull2 $t7.1q, $res6.2d, $h2.2d @ GHASH block 8k+6 - high 2921 2922 pmull $h2.1q, $res6.1d, $h2.1d @ GHASH block 8k+6 - low 2923 trn2 $res6.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid 2924 aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 6 2925 2926 aese $ctr6b, $rk6 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 6 2927 aese $ctr5b, $rk6 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 6 2928 2929 aese $ctr7b, $rk6 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 6 2930 aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 7 2931 aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 6 2932 2933 aese $ctr6b, $rk7 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 7 2934 eor $res6.16b, $res6.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid 2935 2936 pmull2 $t6.1q, $res4.2d, $h34k.2d @ GHASH block 8k+4 - mid 2937 ldp $rk8q, $rk9q, [$cc, #128] @ load rk8, rk9 2938 pmull $h34k.1q, $res4.1d, $h34k.1d @ GHASH block 8k+5 - mid 2939 2940 aese $ctr4b, $rk7 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 7 2941 pmull2 $t8.1q, $res7.2d, $h1.2d @ GHASH block 8k+7 - high 2942 aese $ctr5b, $rk7 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 7 2943 2944 eor3 $acc_mb, $acc_mb, $h34k.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid 2945 aese $ctr7b, $rk7 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 7 2946 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+15 2947 2948 ldr $mod_constantd, [$modulo_constant] @ MODULO - load modulo constant 2949 eor3 $acc_hb, $acc_hb, $t4.16b, $t5.16b @ GHASH block 8k+4, 8k+5 - high 2950 aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 7 2951 2952 pmull2 $t9.1q, $res6.2d, $h12k.2d @ GHASH block 8k+6 - mid 2953 pmull $h1.1q, $res7.1d, $h1.1d @ GHASH block 8k+7 - low 2954 aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 7 2955 2956 aese $ctr5b, $rk8 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 8 2957 aese $ctr4b, $rk8 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 8 2958 aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 8 2959 2960 aese $ctr6b, $rk8 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 8 2961 eor3 $acc_lb, $acc_lb, $h4.16b, $h3.16b @ GHASH block 8k+4, 8k+5 - low 2962 aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 7 2963 2964 aese $ctr7b, $rk8 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 8 2965 aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 8 2966 pmull $h12k.1q, $res6.1d, $h12k.1d @ GHASH block 8k+7 - mid 2967 2968 aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 8 2969 aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 8 2970 ldp $rk10q, $rk11q, [$cc, #160] @ load rk10, rk11 2971 2972 eor3 $acc_lb, $acc_lb, $h2.16b, $h1.16b @ GHASH block 8k+6, 8k+7 - low 2973 rev32 $h1.16b, $rtmp_ctr.16b @ CTR block 8k+16 2974 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+16 2975 2976 aese $ctr2b, $rk9 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 9 2977 eor3 $acc_mb, $acc_mb, $h12k.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid 2978 eor3 $acc_hb, $acc_hb, $t7.16b, $t8.16b @ GHASH block 8k+6, 8k+7 - high 2979 2980 aese $ctr6b, $rk9 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 9 2981 aese $ctr3b, $rk9 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 9 2982 ldp $ctr_t0q, $ctr_t1q, [$input_ptr], #32 @ AES block 8k+8, 8k+9 - load plaintext 2983 2984 pmull $t11.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid 2985 rev32 $h2.16b, $rtmp_ctr.16b @ CTR block 8k+17 2986 aese $ctr0b, $rk9 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 9 2987 2988 aese $ctr4b, $rk9 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 9 2989 aese $ctr1b, $rk9 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 9 2990 aese $ctr7b, $rk9 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 9 2991 2992 eor3 $acc_mb, $acc_mb, $acc_hb, $acc_lb @ MODULO - karatsuba tidy up 2993 aese $ctr5b, $rk9 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 9 2994 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+17 2995 2996 aese $ctr2b, $rk10 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 10 2997 aese $ctr4b, $rk10 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 10 2998 ldr $rk12q, [$cc, #192] @ load rk12 2999 ext $t12.16b, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment 3000 3001 aese $ctr0b, $rk10 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 10 3002 aese $ctr7b, $rk10 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 10 3003 ldp $ctr_t2q, $ctr_t3q, [$input_ptr], #32 @ AES block 8k+10, 8k+11 - load plaintext 3004 3005 aese $ctr4b, $rk11 @ AES block 8k+12 - round 11 3006 eor3 $acc_mb, $acc_mb, $t12.16b, $t11.16b @ MODULO - fold into mid 3007 ldp $ctr_t4q, $ctr_t5q, [$input_ptr], #32 @ AES block 8k+12, 8k+13 - load plaintext 3008 3009 ldp $ctr_t6q, $ctr_t7q, [$input_ptr], #32 @ AES block 8k+14, 8k+15 - load plaintext 3010 aese $ctr2b, $rk11 @ AES block 8k+10 - round 11 3011 aese $ctr1b, $rk10 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 10 3012 3013 rev32 $h3.16b, $rtmp_ctr.16b @ CTR block 8k+18 3014 aese $ctr5b, $rk10 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 10 3015 3016 aese $ctr3b, $rk10 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 10 3017 pmull $acc_h.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low 3018 3019 aese $ctr6b, $rk10 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 10 3020 aese $ctr5b, $rk11 @ AES block 8k+13 - round 11 3021 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+18 3022 3023 aese $ctr7b, $rk11 @ AES block 8k+15 - round 11 3024 aese $ctr0b, $rk11 @ AES block 8k+8 - round 11 3025 eor3 $res4b, $ctr_t4b, $ctr4b, $rk12 @ AES block 4 - result 3026 3027 aese $ctr6b, $rk11 @ AES block 8k+14 - round 11 3028 aese $ctr3b, $rk11 @ AES block 8k+11 - round 11 3029 aese $ctr1b, $rk11 @ AES block 8k+9 - round 11 3030 3031 rev32 $h4.16b, $rtmp_ctr.16b @ CTR block 8k+19 3032 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+19 3033 eor3 $res7b, $ctr_t7b, $ctr7b, $rk12 @ AES block 7 - result 3034 3035 eor3 $res2b, $ctr_t2b, $ctr2b, $rk12 @ AES block 8k+10 - result 3036 eor3 $res0b, $ctr_t0b, $ctr0b, $rk12 @ AES block 8k+8 - result 3037 mov $ctr2.16b, $h3.16b @ CTR block 8k+18 3038 3039 eor3 $res1b, $ctr_t1b, $ctr1b, $rk12 @ AES block 8k+9 - result 3040 mov $ctr1.16b, $h2.16b @ CTR block 8k+17 3041 stp $res0q, $res1q, [$output_ptr], #32 @ AES block 8k+8, 8k+9 - store result 3042 ext $t11.16b, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment 3043 3044 eor3 $res6b, $ctr_t6b, $ctr6b, $rk12 @ AES block 6 - result 3045 mov $ctr0.16b, $h1.16b @ CTR block 8k+16 3046 rev32 $ctr4.16b, $rtmp_ctr.16b @ CTR block 8k+20 3047 3048 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+20 3049 eor3 $res5b, $ctr_t5b, $ctr5b, $rk12 @ AES block 5 - result 3050 eor3 $acc_lb, $acc_lb, $acc_hb, $t11.16b @ MODULO - fold into low 3051 3052 eor3 $res3b, $ctr_t3b, $ctr3b, $rk12 @ AES block 8k+11 - result 3053 mov $ctr3.16b, $h4.16b @ CTR block 8k+19 3054 3055 stp $res2q, $res3q, [$output_ptr], #32 @ AES block 8k+10, 8k+11 - store result 3056 3057 stp $res4q, $res5q, [$output_ptr], #32 @ AES block 8k+12, 8k+13 - store result 3058 3059 cmp $input_ptr, $main_end_input_ptr @ LOOP CONTROL 3060 stp $res6q, $res7q, [$output_ptr], #32 @ AES block 8k+14, 8k+15 - store result 3061 b.lt .L192_enc_main_loop 3062 3063.L192_enc_prepretail: @ PREPRETAIL 3064 rev32 $ctr5.16b, $rtmp_ctr.16b @ CTR block 8k+13 3065 ldp $rk0q, $rk1q, [$cc, #0] @ load rk0, rk1 3066 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+13 3067 3068 ldr $h7q, [$current_tag, #176] @ load h7l | h7h 3069 ext $h7.16b, $h7.16b, $h7.16b, #8 3070 ldr $h8q, [$current_tag, #208] @ load h8l | h8h 3071 ext $h8.16b, $h8.16b, $h8.16b, #8 3072 rev64 $res0b, $res0b @ GHASH block 8k 3073 ext $acc_lb, $acc_lb, $acc_lb, #8 @ PRE 0 3074 3075 rev32 $ctr6.16b, $rtmp_ctr.16b @ CTR block 8k+14 3076 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+14 3077 ldr $h56kq, [$current_tag, #144] @ load h6k | h5k 3078 ldr $h78kq, [$current_tag, #192] @ load h8k | h7k 3079 3080 rev64 $res3b, $res3b @ GHASH block 8k+3 3081 rev64 $res2b, $res2b @ GHASH block 8k+2 3082 ldr $h5q, [$current_tag, #128] @ load h5l | h5h 3083 ext $h5.16b, $h5.16b, $h5.16b, #8 3084 ldr $h6q, [$current_tag, #160] @ load h6l | h6h 3085 ext $h6.16b, $h6.16b, $h6.16b, #8 3086 3087 eor $res0b, $res0b, $acc_lb @ PRE 1 3088 rev32 $ctr7.16b, $rtmp_ctr.16b @ CTR block 8k+15 3089 rev64 $res1b, $res1b @ GHASH block 8k+1 3090 3091 aese $ctr5b, $rk0 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 0 3092 aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 0 3093 aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 0 3094 3095 pmull2 $t0.1q, $res1.2d, $h7.2d @ GHASH block 8k+1 - high 3096 aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 0 3097 aese $ctr6b, $rk0 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 0 3098 3099 aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 0 3100 aese $ctr4b, $rk0 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 0 3101 pmull2 $acc_h.1q, $res0.2d, $h8.2d @ GHASH block 8k - high 3102 3103 aese $ctr6b, $rk1 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 1 3104 pmull $acc_l.1q, $res0.1d, $h8.1d @ GHASH block 8k - low 3105 trn1 $acc_m.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid 3106 3107 trn2 $res0.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid 3108 aese $ctr7b, $rk0 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 0 3109 ldp $rk2q, $rk3q, [$cc, #32] @ load rk2, rk3 3110 3111 pmull $h7.1q, $res1.1d, $h7.1d @ GHASH block 8k+1 - low 3112 eor $acc_hb, $acc_hb, $t0.16b @ GHASH block 8k+1 - high 3113 aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 1 3114 3115 aese $ctr5b, $rk1 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 1 3116 eor $res0.16b, $res0.16b, $acc_m.16b @ GHASH block 8k, 8k+1 - mid 3117 aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 1 3118 3119 aese $ctr7b, $rk1 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 1 3120 pmull2 $t2.1q, $res3.2d, $h5.2d @ GHASH block 8k+3 - high 3121 pmull2 $t1.1q, $res2.2d, $h6.2d @ GHASH block 8k+2 - high 3122 3123 aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 1 3124 aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 1 3125 aese $ctr4b, $rk1 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 1 3126 3127 pmull $h6.1q, $res2.1d, $h6.1d @ GHASH block 8k+2 - low 3128 aese $ctr5b, $rk2 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 2 3129 eor $acc_lb, $acc_lb, $h7.16b @ GHASH block 8k+1 - low 3130 3131 pmull $h5.1q, $res3.1d, $h5.1d @ GHASH block 8k+3 - low 3132 aese $ctr7b, $rk2 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 2 3133 eor3 $acc_hb, $acc_hb, $t1.16b, $t2.16b @ GHASH block 8k+2, 8k+3 - high 3134 3135 aese $ctr5b, $rk3 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 3 3136 trn1 $t3.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid 3137 aese $ctr6b, $rk2 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 2 3138 3139 aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 2 3140 pmull2 $acc_m.1q, $res0.2d, $h78k.2d @ GHASH block 8k - mid 3141 trn2 $res2.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid 3142 3143 aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 2 3144 rev64 $res5b, $res5b @ GHASH block 8k+5 (t0, t1, t2 and t3 free) 3145 rev64 $res6b, $res6b @ GHASH block 8k+6 (t0, t1, and t2 free) 3146 3147 aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 2 3148 aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 2 3149 aese $ctr4b, $rk2 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 2 3150 3151 eor $res2.16b, $res2.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid 3152 pmull $h78k.1q, $res0.1d, $h78k.1d @ GHASH block 8k+1 - mid 3153 ldp $rk4q, $rk5q, [$cc, #64] @ load rk4, rk5 3154 3155 aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 3 3156 aese $ctr6b, $rk3 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 3 3157 aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 3 3158 3159 eor $acc_mb, $acc_mb, $h78k.16b @ GHASH block 8k+1 - mid 3160 eor3 $acc_lb, $acc_lb, $h6.16b, $h5.16b @ GHASH block 8k+2, 8k+3 - low 3161 aese $ctr7b, $rk3 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 3 3162 3163 ldr $h3q, [$current_tag, #80] @ load h3l | h3h 3164 ext $h3.16b, $h3.16b, $h3.16b, #8 3165 ldr $h4q, [$current_tag, #112] @ load h4l | h4h 3166 ext $h4.16b, $h4.16b, $h4.16b, #8 3167 aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 3 3168 pmull2 $t3.1q, $res2.2d, $h56k.2d @ GHASH block 8k+2 - mid 3169 3170 ldr $h1q, [$current_tag, #32] @ load h1l | h1h 3171 ext $h1.16b, $h1.16b, $h1.16b, #8 3172 ldr $h2q, [$current_tag, #64] @ load h2l | h2h 3173 ext $h2.16b, $h2.16b, $h2.16b, #8 3174 aese $ctr4b, $rk3 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 3 3175 rev64 $res4b, $res4b @ GHASH block 8k+4 (t0, t1, and t2 free) 3176 3177 aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 3 3178 pmull $h56k.1q, $res2.1d, $h56k.1d @ GHASH block 8k+3 - mid 3179 aese $ctr6b, $rk4 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 4 3180 3181 trn1 $t6.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid 3182 aese $ctr7b, $rk4 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 4 3183 aese $ctr5b, $rk4 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 4 3184 3185 eor3 $acc_mb, $acc_mb, $h56k.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid 3186 aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 4 3187 aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 4 3188 3189 aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 4 3190 aese $ctr4b, $rk4 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 4 3191 aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 4 3192 3193 aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 5 3194 rev64 $res7b, $res7b @ GHASH block 8k+7 (t0, t1, t2 and t3 free) 3195 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k 3196 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k 3197 3198 aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 5 3199 aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 5 3200 ldp $rk6q, $rk7q, [$cc, #96] @ load rk6, rk7 3201 3202 pmull2 $t7.1q, $res6.2d, $h2.2d @ GHASH block 8k+6 - high 3203 pmull2 $t4.1q, $res4.2d, $h4.2d @ GHASH block 8k+4 - high 3204 pmull $h4.1q, $res4.1d, $h4.1d @ GHASH block 8k+4 - low 3205 3206 aese $ctr4b, $rk5 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 5 3207 trn2 $res4.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid 3208 3209 pmull2 $t5.1q, $res5.2d, $h3.2d @ GHASH block 8k+5 - high 3210 pmull $h3.1q, $res5.1d, $h3.1d @ GHASH block 8k+5 - low 3211 pmull $h2.1q, $res6.1d, $h2.1d @ GHASH block 8k+6 - low 3212 3213 trn1 $t9.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid 3214 eor $res4.16b, $res4.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid 3215 trn2 $res6.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid 3216 3217 aese $ctr5b, $rk5 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 5 3218 aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 6 3219 aese $ctr7b, $rk5 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 5 3220 3221 aese $ctr6b, $rk5 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 5 3222 eor $res6.16b, $res6.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid 3223 aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 5 3224 3225 pmull2 $t6.1q, $res4.2d, $h34k.2d @ GHASH block 8k+4 - mid 3226 pmull $h34k.1q, $res4.1d, $h34k.1d @ GHASH block 8k+5 - mid 3227 3228 aese $ctr4b, $rk6 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 6 3229 aese $ctr5b, $rk6 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 6 3230 aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 7 3231 3232 aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 6 3233 aese $ctr7b, $rk6 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 6 3234 eor3 $acc_mb, $acc_mb, $h34k.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid 3235 3236 aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 6 3237 eor3 $acc_hb, $acc_hb, $t4.16b, $t5.16b @ GHASH block 8k+4, 8k+5 - high 3238 aese $ctr5b, $rk7 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 7 3239 3240 aese $ctr6b, $rk6 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 6 3241 ldr $mod_constantd, [$modulo_constant] @ MODULO - load modulo constant 3242 aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 6 3243 3244 pmull2 $t9.1q, $res6.2d, $h12k.2d @ GHASH block 8k+6 - mid 3245 aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 7 3246 eor3 $acc_lb, $acc_lb, $h4.16b, $h3.16b @ GHASH block 8k+4, 8k+5 - low 3247 3248 pmull2 $t8.1q, $res7.2d, $h1.2d @ GHASH block 8k+7 - high 3249 pmull $h12k.1q, $res6.1d, $h12k.1d @ GHASH block 8k+7 - mid 3250 pmull $h1.1q, $res7.1d, $h1.1d @ GHASH block 8k+7 - low 3251 3252 aese $ctr4b, $rk7 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 7 3253 aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 7 3254 ldp $rk8q, $rk9q, [$cc, #128] @ load rk8, rk9 3255 3256 aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 7 3257 eor3 $acc_mb, $acc_mb, $h12k.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid 3258 3259 eor3 $acc_lb, $acc_lb, $h2.16b, $h1.16b @ GHASH block 8k+6, 8k+7 - low 3260 eor3 $acc_hb, $acc_hb, $t7.16b, $t8.16b @ GHASH block 8k+6, 8k+7 - high 3261 3262 eor3 $acc_mb, $acc_mb, $acc_hb, $acc_lb @ MODULO - karatsuba tidy up 3263 ext $t12.16b, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment 3264 aese $ctr7b, $rk7 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 7 3265 pmull $t11.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid 3266 3267 aese $ctr5b, $rk8 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 8 3268 aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 8 3269 3270 aese $ctr6b, $rk7 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 7 3271 aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 8 3272 eor3 $acc_mb, $acc_mb, $t12.16b, $t11.16b @ MODULO - fold into mid 3273 3274 aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 8 3275 aese $ctr5b, $rk9 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 9 3276 aese $ctr4b, $rk8 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 8 3277 3278 aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 8 3279 aese $ctr7b, $rk8 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 8 3280 aese $ctr6b, $rk8 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 8 3281 3282 aese $ctr3b, $rk9 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 9 3283 ldp $rk10q, $rk11q, [$cc, #160] @ load rk10, rk11 3284 aese $ctr4b, $rk9 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 9 3285 3286 aese $ctr2b, $rk9 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 9 3287 aese $ctr7b, $rk9 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 9 3288 3289 ext $t11.16b, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment 3290 aese $ctr6b, $rk9 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 9 3291 aese $ctr0b, $rk9 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 9 3292 aese $ctr1b, $rk9 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 9 3293 3294 pmull $acc_h.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low 3295 ldr $rk12q, [$cc, #192] @ load rk12 3296 3297 aese $ctr7b, $rk10 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 10 3298 aese $ctr1b, $rk10 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 10 3299 aese $ctr2b, $rk10 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 10 3300 3301 eor3 $acc_lb, $acc_lb, $acc_hb, $t11.16b @ MODULO - fold into low 3302 aese $ctr0b, $rk10 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 10 3303 aese $ctr3b, $rk10 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 10 3304 3305 aese $ctr1b, $rk11 @ AES block 8k+9 - round 11 3306 aese $ctr7b, $rk11 @ AES block 8k+15 - round 11 3307 3308 aese $ctr4b, $rk10 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 10 3309 aese $ctr3b, $rk11 @ AES block 8k+11 - round 11 3310 3311 aese $ctr5b, $rk10 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 10 3312 aese $ctr6b, $rk10 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 10 3313 3314 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+15 3315 aese $ctr2b, $rk11 @ AES block 8k+10 - round 11 3316 aese $ctr0b, $rk11 @ AES block 8k+8 - round 11 3317 3318 aese $ctr6b, $rk11 @ AES block 8k+14 - round 11 3319 aese $ctr4b, $rk11 @ AES block 8k+12 - round 11 3320 aese $ctr5b, $rk11 @ AES block 8k+13 - round 11 3321 3322.L192_enc_tail: @ TAIL 3323 3324 ldp $h5q, $h56kq, [$current_tag, #128] @ load h5l | h5h 3325 ext $h5.16b, $h5.16b, $h5.16b, #8 3326 sub $main_end_input_ptr, $end_input_ptr, $input_ptr @ main_end_input_ptr is number of bytes left to process 3327 3328 ldr $ctr_t0q, [$input_ptr], #16 @ AES block 8k+8 - l3ad plaintext 3329 3330 ldp $h78kq, $h8q, [$current_tag, #192] @ load h8k | h7k 3331 ext $h8.16b, $h8.16b, $h8.16b, #8 3332 3333 mov $t1.16b, $rk12 3334 3335 ldp $h6q, $h7q, [$current_tag, #160] @ load h6l | h6h 3336 ext $h6.16b, $h6.16b, $h6.16b, #8 3337 ext $h7.16b, $h7.16b, $h7.16b, #8 3338 cmp $main_end_input_ptr, #112 3339 3340 eor3 $res1b, $ctr_t0b, $ctr0b, $t1.16b @ AES block 8k+8 - result 3341 ext $t0.16b, $acc_lb, $acc_lb, #8 @ prepare final partial tag 3342 b.gt .L192_enc_blocks_more_than_7 3343 3344 cmp $main_end_input_ptr, #96 3345 mov $ctr7b, $ctr6b 3346 movi $acc_h.8b, #0 3347 3348 mov $ctr6b, $ctr5b 3349 movi $acc_l.8b, #0 3350 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 3351 3352 mov $ctr5b, $ctr4b 3353 mov $ctr4b, $ctr3b 3354 mov $ctr3b, $ctr2b 3355 3356 mov $ctr2b, $ctr1b 3357 movi $acc_m.8b, #0 3358 b.gt .L192_enc_blocks_more_than_6 3359 3360 mov $ctr7b, $ctr6b 3361 cmp $main_end_input_ptr, #80 3362 3363 mov $ctr6b, $ctr5b 3364 mov $ctr5b, $ctr4b 3365 mov $ctr4b, $ctr3b 3366 3367 mov $ctr3b, $ctr1b 3368 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 3369 b.gt .L192_enc_blocks_more_than_5 3370 3371 cmp $main_end_input_ptr, #64 3372 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 3373 3374 mov $ctr7b, $ctr6b 3375 mov $ctr6b, $ctr5b 3376 mov $ctr5b, $ctr4b 3377 3378 mov $ctr4b, $ctr1b 3379 b.gt .L192_enc_blocks_more_than_4 3380 3381 mov $ctr7b, $ctr6b 3382 mov $ctr6b, $ctr5b 3383 mov $ctr5b, $ctr1b 3384 3385 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 3386 cmp $main_end_input_ptr, #48 3387 b.gt .L192_enc_blocks_more_than_3 3388 3389 mov $ctr7b, $ctr6b 3390 mov $ctr6b, $ctr1b 3391 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 3392 3393 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k 3394 cmp $main_end_input_ptr, #32 3395 b.gt .L192_enc_blocks_more_than_2 3396 3397 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 3398 3399 cmp $main_end_input_ptr, #16 3400 mov $ctr7b, $ctr1b 3401 b.gt .L192_enc_blocks_more_than_1 3402 3403 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 3404 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k 3405 b .L192_enc_blocks_less_than_1 3406.L192_enc_blocks_more_than_7: @ blocks left > 7 3407 st1 { $res1b}, [$output_ptr], #16 @ AES final-7 block - store result 3408 3409 rev64 $res0b, $res1b @ GHASH final-7 block 3410 ins $acc_m.d[0], $h78k.d[1] @ GHASH final-7 block - mid 3411 3412 eor $res0b, $res0b, $t0.16b @ feed in partial tag 3413 3414 ins $rk4v.d[0], $res0.d[1] @ GHASH final-7 block - mid 3415 3416 ldr $ctr_t1q, [$input_ptr], #16 @ AES final-6 block - load plaintext 3417 3418 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-7 block - mid 3419 movi $t0.8b, #0 @ suppress further partial tag feed in 3420 pmull $acc_l.1q, $res0.1d, $h8.1d @ GHASH final-7 block - low 3421 3422 pmull2 $acc_h.1q, $res0.2d, $h8.2d @ GHASH final-7 block - high 3423 3424 pmull $acc_m.1q, $rk4v.1d, $acc_m.1d @ GHASH final-7 block - mid 3425 eor3 $res1b, $ctr_t1b, $ctr1b, $t1.16b @ AES final-6 block - result 3426.L192_enc_blocks_more_than_6: @ blocks left > 6 3427 3428 st1 { $res1b}, [$output_ptr], #16 @ AES final-6 block - store result 3429 3430 rev64 $res0b, $res1b @ GHASH final-6 block 3431 3432 ldr $ctr_t1q, [$input_ptr], #16 @ AES final-5 block - load plaintext 3433 3434 eor $res0b, $res0b, $t0.16b @ feed in partial tag 3435 3436 ins $rk4v.d[0], $res0.d[1] @ GHASH final-6 block - mid 3437 3438 pmull $rk3q1, $res0.1d, $h7.1d @ GHASH final-6 block - low 3439 eor3 $res1b, $ctr_t1b, $ctr2b, $t1.16b @ AES final-5 block - result 3440 3441 movi $t0.8b, #0 @ suppress further partial tag feed in 3442 pmull2 $rk2q1, $res0.2d, $h7.2d @ GHASH final-6 block - high 3443 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-6 block - mid 3444 3445 pmull $rk4v.1q, $rk4v.1d, $h78k.1d @ GHASH final-6 block - mid 3446 3447 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-6 block - high 3448 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-6 block - low 3449 3450 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-6 block - mid 3451.L192_enc_blocks_more_than_5: @ blocks left > 5 3452 3453 st1 { $res1b}, [$output_ptr], #16 @ AES final-5 block - store result 3454 3455 rev64 $res0b, $res1b @ GHASH final-5 block 3456 3457 eor $res0b, $res0b, $t0.16b @ feed in partial tag 3458 3459 ins $rk4v.d[0], $res0.d[1] @ GHASH final-5 block - mid 3460 3461 ldr $ctr_t1q, [$input_ptr], #16 @ AES final-4 block - load plaintext 3462 pmull2 $rk2q1, $res0.2d, $h6.2d @ GHASH final-5 block - high 3463 3464 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-5 block - mid 3465 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-5 block - high 3466 3467 ins $rk4v.d[1], $rk4v.d[0] @ GHASH final-5 block - mid 3468 pmull $rk3q1, $res0.1d, $h6.1d @ GHASH final-5 block - low 3469 3470 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-5 block - low 3471 pmull2 $rk4v.1q, $rk4v.2d, $h56k.2d @ GHASH final-5 block - mid 3472 3473 eor3 $res1b, $ctr_t1b, $ctr3b, $t1.16b @ AES final-4 block - result 3474 movi $t0.8b, #0 @ suppress further partial tag feed in 3475 3476 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-5 block - mid 3477.L192_enc_blocks_more_than_4: @ blocks left > 4 3478 3479 st1 { $res1b}, [$output_ptr], #16 @ AES final-4 block - store result 3480 3481 rev64 $res0b, $res1b @ GHASH final-4 block 3482 3483 eor $res0b, $res0b, $t0.16b @ feed in partial tag 3484 3485 ldr $ctr_t1q, [$input_ptr], #16 @ AES final-3 block - load plaintext 3486 pmull2 $rk2q1, $res0.2d, $h5.2d @ GHASH final-4 block - high 3487 ins $rk4v.d[0], $res0.d[1] @ GHASH final-4 block - mid 3488 3489 pmull $rk3q1, $res0.1d, $h5.1d @ GHASH final-4 block - low 3490 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-4 block - high 3491 3492 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-4 block - mid 3493 3494 movi $t0.8b, #0 @ suppress further partial tag feed in 3495 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-4 block - low 3496 3497 pmull $rk4v.1q, $rk4v.1d, $h56k.1d @ GHASH final-4 block - mid 3498 3499 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-4 block - mid 3500 eor3 $res1b, $ctr_t1b, $ctr4b, $t1.16b @ AES final-3 block - result 3501.L192_enc_blocks_more_than_3: @ blocks left > 3 3502 3503 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k 3504 st1 { $res1b}, [$output_ptr], #16 @ AES final-3 block - store result 3505 3506 rev64 $res0b, $res1b @ GHASH final-3 block 3507 3508 eor $res0b, $res0b, $t0.16b @ feed in partial tag 3509 movi $t0.8b, #0 @ suppress further partial tag feed in 3510 3511 ldr $ctr_t1q, [$input_ptr], #16 @ AES final-2 block - load plaintext 3512 ldr $h4q, [$current_tag, #112] @ load h4l | h4h 3513 ext $h4.16b, $h4.16b, $h4.16b, #8 3514 3515 ins $rk4v.d[0], $res0.d[1] @ GHASH final-3 block - mid 3516 3517 eor3 $res1b, $ctr_t1b, $ctr5b, $t1.16b @ AES final-2 block - result 3518 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-3 block - mid 3519 3520 ins $rk4v.d[1], $rk4v.d[0] @ GHASH final-3 block - mid 3521 pmull $rk3q1, $res0.1d, $h4.1d @ GHASH final-3 block - low 3522 3523 pmull2 $rk2q1, $res0.2d, $h4.2d @ GHASH final-3 block - high 3524 pmull2 $rk4v.1q, $rk4v.2d, $h34k.2d @ GHASH final-3 block - mid 3525 3526 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-3 block - low 3527 3528 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-3 block - mid 3529 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-3 block - high 3530.L192_enc_blocks_more_than_2: @ blocks left > 2 3531 3532 st1 { $res1b}, [$output_ptr], #16 @ AES final-2 block - store result 3533 3534 rev64 $res0b, $res1b @ GHASH final-2 block 3535 ldr $h3q, [$current_tag, #80] @ load h3l | h3h 3536 ext $h3.16b, $h3.16b, $h3.16b, #8 3537 3538 eor $res0b, $res0b, $t0.16b @ feed in partial tag 3539 3540 ldr $ctr_t1q, [$input_ptr], #16 @ AES final-1 block - load plaintext 3541 ins $rk4v.d[0], $res0.d[1] @ GHASH final-2 block - mid 3542 3543 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-2 block - mid 3544 3545 pmull $rk3q1, $res0.1d, $h3.1d @ GHASH final-2 block - low 3546 pmull2 $rk2q1, $res0.2d, $h3.2d @ GHASH final-2 block - high 3547 movi $t0.8b, #0 @ suppress further partial tag feed in 3548 3549 pmull $rk4v.1q, $rk4v.1d, $h34k.1d @ GHASH final-2 block - mid 3550 3551 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-2 block - low 3552 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-2 block - high 3553 3554 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-2 block - mid 3555 eor3 $res1b, $ctr_t1b, $ctr6b, $t1.16b @ AES final-1 block - result 3556.L192_enc_blocks_more_than_1: @ blocks left > 1 3557 3558 ldr $h2q, [$current_tag, #64] @ load h1l | h1h 3559 ext $h2.16b, $h2.16b, $h2.16b, #8 3560 st1 { $res1b}, [$output_ptr], #16 @ AES final-1 block - store result 3561 3562 rev64 $res0b, $res1b @ GHASH final-1 block 3563 3564 eor $res0b, $res0b, $t0.16b @ feed in partial tag 3565 3566 ins $rk4v.d[0], $res0.d[1] @ GHASH final-1 block - mid 3567 pmull $rk3q1, $res0.1d, $h2.1d @ GHASH final-1 block - low 3568 3569 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-1 block - low 3570 pmull2 $rk2q1, $res0.2d, $h2.2d @ GHASH final-1 block - high 3571 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-1 block - mid 3572 3573 ldr $ctr_t1q, [$input_ptr], #16 @ AES final block - load plaintext 3574 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k 3575 3576 ins $rk4v.d[1], $rk4v.d[0] @ GHASH final-1 block - mid 3577 3578 eor3 $res1b, $ctr_t1b, $ctr7b, $t1.16b @ AES final block - result 3579 pmull2 $rk4v.1q, $rk4v.2d, $h12k.2d @ GHASH final-1 block - mid 3580 3581 movi $t0.8b, #0 @ suppress further partial tag feed in 3582 3583 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-1 block - mid 3584 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-1 block - high 3585.L192_enc_blocks_less_than_1: @ blocks left <= 1 3586 3587 mvn $temp0_x, xzr @ temp0_x = 0xffffffffffffffff 3588 and $bit_length, $bit_length, #127 @ bit_length %= 128 3589 3590 sub $bit_length, $bit_length, #128 @ bit_length -= 128 3591 3592 neg $bit_length, $bit_length @ bit_length = 128 - #bits in input (in range [1,128]) 3593 3594 and $bit_length, $bit_length, #127 @ bit_length %= 128 3595 3596 lsr $temp0_x, $temp0_x, $bit_length @ temp0_x is mask for top 64b of last block 3597 cmp $bit_length, #64 3598 mvn $temp1_x, xzr @ temp1_x = 0xffffffffffffffff 3599 3600 csel $temp2_x, $temp1_x, $temp0_x, lt 3601 csel $temp3_x, $temp0_x, xzr, lt 3602 3603 mov $ctr0.d[1], $temp3_x 3604 ldr $h1q, [$current_tag, #32] @ load h1l | h1h 3605 ext $h1.16b, $h1.16b, $h1.16b, #8 3606 3607 ld1 { $rk0}, [$output_ptr] @ load existing bytes where the possibly partial last block is to be stored 3608 mov $ctr0.d[0], $temp2_x @ ctr0b is mask for last block 3609 3610 and $res1b, $res1b, $ctr0b @ possibly partial last block has zeroes in highest bits 3611 3612 rev64 $res0b, $res1b @ GHASH final block 3613 bif $res1b, $rk0, $ctr0b @ insert existing bytes in top end of result before storing 3614 3615 st1 { $res1b}, [$output_ptr] @ store all 16B 3616 3617 eor $res0b, $res0b, $t0.16b @ feed in partial tag 3618 3619 ins $t0.d[0], $res0.d[1] @ GHASH final block - mid 3620 pmull2 $rk2q1, $res0.2d, $h1.2d @ GHASH final block - high 3621 3622 eor $acc_hb, $acc_hb, $rk2 @ GHASH final block - high 3623 pmull $rk3q1, $res0.1d, $h1.1d @ GHASH final block - low 3624 3625 eor $t0.8b, $t0.8b, $res0.8b @ GHASH final block - mid 3626 3627 pmull $t0.1q, $t0.1d, $h12k.1d @ GHASH final block - mid 3628 3629 eor $acc_mb, $acc_mb, $t0.16b @ GHASH final block - mid 3630 ldr $mod_constantd, [$modulo_constant] @ MODULO - load modulo constant 3631 3632 eor $acc_lb, $acc_lb, $rk3 @ GHASH final block - low 3633 ext $t11.16b, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment 3634 3635 rev32 $rtmp_ctr.16b, $rtmp_ctr.16b 3636 3637 str $rtmp_ctrq, [$counter] @ store the updated counter 3638 eor3 $acc_mb, $acc_mb, $acc_hb, $acc_lb @ MODULO - karatsuba tidy up 3639 3640 pmull $t12.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid 3641 3642 eor3 $acc_mb, $acc_mb, $t12.16b, $t11.16b @ MODULO - fold into mid 3643 3644 pmull $acc_h.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low 3645 ext $t11.16b, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment 3646 3647 eor3 $acc_lb, $acc_lb, $acc_hb, $t11.16b @ MODULO - fold into low 3648 ext $acc_lb, $acc_lb, $acc_lb, #8 3649 rev64 $acc_lb, $acc_lb 3650 st1 { $acc_l.16b }, [$current_tag] 3651 3652 mov x0, $byte_length @ return sizes 3653 3654 ldp d10, d11, [sp, #16] 3655 ldp d12, d13, [sp, #32] 3656 ldp d14, d15, [sp, #48] 3657 ldp d8, d9, [sp], #80 3658 ret 3659 3660.L192_enc_ret: 3661 mov w0, #0x0 3662 ret 3663.size unroll8_eor3_aes_gcm_enc_192_kernel,.-unroll8_eor3_aes_gcm_enc_192_kernel 3664___ 3665 3666######################################################################################### 3667# size_t unroll8_eor3_aes_gcm_dec_192_kernel(const uint8_t * ciphertext, 3668# uint64_t plaintext_length, 3669# uint8_t * plaintext, 3670# uint64_t *Xi, 3671# unsigned char ivec[16], 3672# const void *key); 3673# 3674$code.=<<___; 3675.global unroll8_eor3_aes_gcm_dec_192_kernel 3676.type unroll8_eor3_aes_gcm_dec_192_kernel,%function 3677.align 4 3678unroll8_eor3_aes_gcm_dec_192_kernel: 3679 AARCH64_VALID_CALL_TARGET 3680 cbz x1, .L192_dec_ret 3681 stp d8, d9, [sp, #-80]! 3682 lsr $byte_length, $bit_length, #3 3683 mov $counter, x4 3684 mov $cc, x5 3685 stp d10, d11, [sp, #16] 3686 stp d12, d13, [sp, #32] 3687 stp d14, d15, [sp, #48] 3688 mov x5, #0xc200000000000000 3689 stp x5, xzr, [sp, #64] 3690 add $modulo_constant, sp, #64 3691 3692 mov $main_end_input_ptr, $byte_length 3693 ld1 { $ctr0b}, [$counter] @ CTR block 0 3694 ld1 { $acc_lb}, [$current_tag] 3695 3696 mov $constant_temp, #0x100000000 @ set up counter increment 3697 movi $rctr_inc.16b, #0x0 3698 mov $rctr_inc.d[1], $constant_temp 3699 3700 rev32 $rtmp_ctr.16b, $ctr0.16b @ set up reversed counter 3701 3702 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 0 3703 3704 rev32 $ctr1.16b, $rtmp_ctr.16b @ CTR block 1 3705 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 1 3706 3707 rev32 $ctr2.16b, $rtmp_ctr.16b @ CTR block 2 3708 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 2 3709 3710 rev32 $ctr3.16b, $rtmp_ctr.16b @ CTR block 3 3711 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 3 3712 3713 rev32 $ctr4.16b, $rtmp_ctr.16b @ CTR block 4 3714 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 4 3715 3716 rev32 $ctr5.16b, $rtmp_ctr.16b @ CTR block 5 3717 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 5 3718 ldp $rk0q, $rk1q, [$cc, #0] @ load rk0, rk1 3719 3720 rev32 $ctr6.16b, $rtmp_ctr.16b @ CTR block 6 3721 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 6 3722 3723 rev32 $ctr7.16b, $rtmp_ctr.16b @ CTR block 7 3724 3725 aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 0 3726 aese $ctr6b, $rk0 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 0 3727 aese $ctr5b, $rk0 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 0 3728 3729 aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 0 3730 aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 0 3731 aese $ctr7b, $rk0 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 0 3732 3733 aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 0 3734 aese $ctr4b, $rk0 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 0 3735 ldp $rk2q, $rk3q, [$cc, #32] @ load rk2, rk3 3736 3737 aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 1 3738 3739 aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 1 3740 3741 aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 1 3742 aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 1 3743 aese $ctr7b, $rk1 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 1 3744 3745 aese $ctr5b, $rk1 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 1 3746 aese $ctr6b, $rk1 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 1 3747 3748 aese $ctr7b, $rk2 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 2 3749 aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 2 3750 aese $ctr4b, $rk1 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 1 3751 3752 aese $ctr5b, $rk2 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 2 3753 aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 2 3754 aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 2 3755 3756 aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 2 3757 aese $ctr4b, $rk2 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 2 3758 aese $ctr6b, $rk2 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 2 3759 3760 aese $ctr7b, $rk3 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 3 3761 3762 ldp $rk4q, $rk5q, [$cc, #64] @ load rk4, rk5 3763 aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 3 3764 aese $ctr5b, $rk3 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 3 3765 3766 aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 3 3767 aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 3 3768 3769 aese $ctr4b, $rk3 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 3 3770 aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 3 3771 aese $ctr6b, $rk3 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 3 3772 3773 aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 4 3774 aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 4 3775 aese $ctr5b, $rk4 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 4 3776 3777 aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 4 3778 aese $ctr7b, $rk4 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 4 3779 aese $ctr6b, $rk4 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 4 3780 3781 aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 4 3782 aese $ctr5b, $rk5 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 5 3783 aese $ctr4b, $rk4 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 4 3784 3785 aese $ctr6b, $rk5 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 5 3786 ldp $rk6q, $rk7q, [$cc, #96] @ load rk6, rk7 3787 3788 aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 5 3789 aese $ctr4b, $rk5 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 5 3790 aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 5 3791 3792 aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 5 3793 aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 5 3794 aese $ctr7b, $rk5 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 5 3795 3796 sub $main_end_input_ptr, $main_end_input_ptr, #1 @ byte_len - 1 3797 3798 aese $ctr4b, $rk6 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 6 3799 aese $ctr5b, $rk6 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 6 3800 aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 6 3801 3802 aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 6 3803 aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 6 3804 aese $ctr6b, $rk6 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 6 3805 3806 aese $ctr7b, $rk6 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 6 3807 aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 6 3808 ldp $rk8q, $rk9q, [$cc, #128] @ load rk8, rk9 3809 3810 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 7 3811 3812 aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 7 3813 aese $ctr7b, $rk7 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 7 3814 3815 aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 7 3816 aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 7 3817 aese $ctr4b, $rk7 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 7 3818 3819 aese $ctr6b, $rk7 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 7 3820 aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 7 3821 aese $ctr5b, $rk7 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 7 3822 3823 aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 8 3824 aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 8 3825 and $main_end_input_ptr, $main_end_input_ptr, #0xffffffffffffff80 @ number of bytes to be processed in main loop (at least 1 byte must be handled by tail) 3826 3827 aese $ctr7b, $rk8 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 8 3828 aese $ctr6b, $rk8 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 8 3829 aese $ctr5b, $rk8 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 8 3830 3831 aese $ctr4b, $rk8 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 8 3832 aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 8 3833 aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 8 3834 3835 add $end_input_ptr, $input_ptr, $bit_length, lsr #3 @ end_input_ptr 3836 aese $ctr6b, $rk9 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 9 3837 3838 ld1 { $acc_lb}, [$current_tag] 3839 ext $acc_lb, $acc_lb, $acc_lb, #8 3840 rev64 $acc_lb, $acc_lb 3841 3842 ldp $rk10q, $rk11q, [$cc, #160] @ load rk10, rk11 3843 3844 aese $ctr0b, $rk9 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 9 3845 add $main_end_input_ptr, $main_end_input_ptr, $input_ptr 3846 3847 aese $ctr1b, $rk9 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 9 3848 aese $ctr7b, $rk9 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 9 3849 aese $ctr4b, $rk9 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 9 3850 3851 cmp $input_ptr, $main_end_input_ptr @ check if we have <= 8 blocks 3852 aese $ctr3b, $rk9 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 9 3853 3854 aese $ctr5b, $rk9 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 9 3855 aese $ctr2b, $rk9 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 9 3856 3857 aese $ctr3b, $rk10 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 10 3858 aese $ctr1b, $rk10 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 10 3859 aese $ctr7b, $rk10 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 10 3860 3861 aese $ctr4b, $rk10 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 10 3862 aese $ctr0b, $rk10 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 10 3863 aese $ctr2b, $rk10 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 10 3864 3865 aese $ctr6b, $rk10 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 10 3866 aese $ctr5b, $rk10 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 10 3867 ldr $rk12q, [$cc, #192] @ load rk12 3868 3869 aese $ctr0b, $rk11 @ AES block 0 - round 11 3870 aese $ctr1b, $rk11 @ AES block 1 - round 11 3871 aese $ctr4b, $rk11 @ AES block 4 - round 11 3872 3873 aese $ctr6b, $rk11 @ AES block 6 - round 11 3874 aese $ctr5b, $rk11 @ AES block 5 - round 11 3875 aese $ctr7b, $rk11 @ AES block 7 - round 11 3876 3877 aese $ctr2b, $rk11 @ AES block 2 - round 11 3878 aese $ctr3b, $rk11 @ AES block 3 - round 11 3879 b.ge .L192_dec_tail @ handle tail 3880 3881 ldp $res0q, $res1q, [$input_ptr], #32 @ AES block 0, 1 - load ciphertext 3882 3883 ldp $res2q, $res3q, [$input_ptr], #32 @ AES block 2, 3 - load ciphertext 3884 3885 ldp $res4q, $res5q, [$input_ptr], #32 @ AES block 4, 5 - load ciphertext 3886 3887 eor3 $ctr1b, $res1b, $ctr1b, $rk12 @ AES block 1 - result 3888 eor3 $ctr0b, $res0b, $ctr0b, $rk12 @ AES block 0 - result 3889 stp $ctr0q, $ctr1q, [$output_ptr], #32 @ AES block 0, 1 - store result 3890 3891 rev32 $ctr0.16b, $rtmp_ctr.16b @ CTR block 8 3892 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8 3893 3894 rev32 $ctr1.16b, $rtmp_ctr.16b @ CTR block 9 3895 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 9 3896 eor3 $ctr3b, $res3b, $ctr3b, $rk12 @ AES block 3 - result 3897 3898 eor3 $ctr2b, $res2b, $ctr2b, $rk12 @ AES block 2 - result 3899 stp $ctr2q, $ctr3q, [$output_ptr], #32 @ AES block 2, 3 - store result 3900 ldp $res6q, $res7q, [$input_ptr], #32 @ AES block 6, 7 - load ciphertext 3901 3902 rev32 $ctr2.16b, $rtmp_ctr.16b @ CTR block 10 3903 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 10 3904 3905 eor3 $ctr4b, $res4b, $ctr4b, $rk12 @ AES block 4 - result 3906 3907 rev32 $ctr3.16b, $rtmp_ctr.16b @ CTR block 11 3908 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 11 3909 3910 eor3 $ctr5b, $res5b, $ctr5b, $rk12 @ AES block 5 - result 3911 stp $ctr4q, $ctr5q, [$output_ptr], #32 @ AES block 4, 5 - store result 3912 cmp $input_ptr, $main_end_input_ptr @ check if we have <= 8 blocks 3913 3914 eor3 $ctr6b, $res6b, $ctr6b, $rk12 @ AES block 6 - result 3915 eor3 $ctr7b, $res7b, $ctr7b, $rk12 @ AES block 7 - result 3916 rev32 $ctr4.16b, $rtmp_ctr.16b @ CTR block 12 3917 3918 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 12 3919 stp $ctr6q, $ctr7q, [$output_ptr], #32 @ AES block 6, 7 - store result 3920 b.ge .L192_dec_prepretail @ do prepretail 3921 3922.L192_dec_main_loop: @ main loop start 3923 rev64 $res1b, $res1b @ GHASH block 8k+1 3924 ldp $rk0q, $rk1q, [$cc, #0] @ load rk0, rk1 3925 ext $acc_lb, $acc_lb, $acc_lb, #8 @ PRE 0 3926 3927 rev64 $res0b, $res0b @ GHASH block 8k 3928 rev32 $ctr5.16b, $rtmp_ctr.16b @ CTR block 8k+13 3929 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+13 3930 3931 ldr $h7q, [$current_tag, #176] @ load h7l | h7h 3932 ext $h7.16b, $h7.16b, $h7.16b, #8 3933 ldr $h8q, [$current_tag, #208] @ load h8l | h8h 3934 ext $h8.16b, $h8.16b, $h8.16b, #8 3935 rev64 $res4b, $res4b @ GHASH block 8k+4 3936 rev64 $res3b, $res3b @ GHASH block 8k+3 3937 3938 eor $res0b, $res0b, $acc_lb @ PRE 1 3939 rev32 $ctr6.16b, $rtmp_ctr.16b @ CTR block 8k+14 3940 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+14 3941 3942 rev64 $res5b, $res5b @ GHASH block 8k+5 3943 3944 rev32 $ctr7.16b, $rtmp_ctr.16b @ CTR block 8k+15 3945 aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 0 3946 aese $ctr6b, $rk0 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 0 3947 3948 aese $ctr5b, $rk0 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 0 3949 aese $ctr4b, $rk0 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 0 3950 aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 0 3951 3952 aese $ctr7b, $rk0 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 0 3953 aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 0 3954 aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 0 3955 3956 pmull $acc_l.1q, $res0.1d, $h8.1d @ GHASH block 8k - low 3957 pmull2 $t0.1q, $res1.2d, $h7.2d @ GHASH block 8k+1 - high 3958 ldp $rk2q, $rk3q, [$cc, #32] @ load rk2, rk3 3959 3960 aese $ctr6b, $rk1 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 1 3961 pmull $h7.1q, $res1.1d, $h7.1d @ GHASH block 8k+1 - low 3962 ldr $h5q, [$current_tag, #128] @ load h5l | h5h 3963 ext $h5.16b, $h5.16b, $h5.16b, #8 3964 ldr $h6q, [$current_tag, #160] @ load h6l | h6h 3965 ext $h6.16b, $h6.16b, $h6.16b, #8 3966 3967 aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 1 3968 aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 1 3969 aese $ctr7b, $rk1 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 1 3970 3971 pmull2 $acc_h.1q, $res0.2d, $h8.2d @ GHASH block 8k - high 3972 aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 1 3973 aese $ctr4b, $rk1 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 1 3974 3975 trn1 $acc_m.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid 3976 rev64 $res2b, $res2b @ GHASH block 8k+2 3977 aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 1 3978 3979 aese $ctr5b, $rk1 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 1 3980 ldr $h56kq, [$current_tag, #144] @ load h6k | h5k 3981 ldr $h78kq, [$current_tag, #192] @ load h8k | h7k 3982 trn2 $res0.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid 3983 3984 eor $acc_hb, $acc_hb, $t0.16b @ GHASH block 8k+1 - high 3985 pmull2 $t2.1q, $res3.2d, $h5.2d @ GHASH block 8k+3 - high 3986 pmull2 $t1.1q, $res2.2d, $h6.2d @ GHASH block 8k+2 - high 3987 3988 eor $res0.16b, $res0.16b, $acc_m.16b @ GHASH block 8k, 8k+1 - mid 3989 eor $acc_lb, $acc_lb, $h7.16b @ GHASH block 8k+1 - low 3990 aese $ctr6b, $rk2 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 2 3991 3992 aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 2 3993 pmull $h5.1q, $res3.1d, $h5.1d @ GHASH block 8k+3 - low 3994 eor3 $acc_hb, $acc_hb, $t1.16b, $t2.16b @ GHASH block 8k+2, 8k+3 - high 3995 3996 aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 2 3997 aese $ctr6b, $rk3 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 3 3998 aese $ctr4b, $rk2 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 2 3999 4000 aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 2 4001 aese $ctr7b, $rk2 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 2 4002 aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 2 4003 4004 ldr $h3q, [$current_tag, #80] @ load h3l | h3h 4005 ext $h3.16b, $h3.16b, $h3.16b, #8 4006 ldr $h4q, [$current_tag, #112] @ load h4l | h4h 4007 ext $h4.16b, $h4.16b, $h4.16b, #8 4008 aese $ctr5b, $rk2 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 2 4009 aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 3 4010 4011 pmull $h6.1q, $res2.1d, $h6.1d @ GHASH block 8k+2 - low 4012 trn1 $t3.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid 4013 trn2 $res2.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid 4014 4015 aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 3 4016 aese $ctr4b, $rk3 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 3 4017 4018 aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 3 4019 aese $ctr7b, $rk3 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 3 4020 ldp $rk4q, $rk5q, [$cc, #64] @ load rk4, rk5 4021 4022 eor $res2.16b, $res2.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid 4023 eor3 $acc_lb, $acc_lb, $h6.16b, $h5.16b @ GHASH block 8k+2, 8k+3 - low 4024 aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 3 4025 4026 trn1 $t6.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid 4027 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+15 4028 4029 pmull2 $t3.1q, $res2.2d, $h56k.2d @ GHASH block 8k+2 - mid 4030 pmull2 $acc_m.1q, $res0.2d, $h78k.2d @ GHASH block 8k - mid 4031 pmull $h78k.1q, $res0.1d, $h78k.1d @ GHASH block 8k+1 - mid 4032 4033 aese $ctr5b, $rk3 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 3 4034 pmull $h56k.1q, $res2.1d, $h56k.1d @ GHASH block 8k+3 - mid 4035 pmull2 $t4.1q, $res4.2d, $h4.2d @ GHASH block 8k+4 - high 4036 4037 aese $ctr4b, $rk4 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 4 4038 aese $ctr6b, $rk4 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 4 4039 eor $acc_mb, $acc_mb, $h78k.16b @ GHASH block 8k+1 - mid 4040 4041 aese $ctr5b, $rk4 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 4 4042 aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 4 4043 aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 4 4044 4045 aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 4 4046 aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 4 4047 aese $ctr7b, $rk4 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 4 4048 4049 ldr $h1q, [$current_tag, #32] @ load h1l | h1h 4050 ext $h1.16b, $h1.16b, $h1.16b, #8 4051 ldr $h2q, [$current_tag, #64] @ load h2l | h2h 4052 ext $h2.16b, $h2.16b, $h2.16b, #8 4053 aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 5 4054 aese $ctr5b, $rk5 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 5 4055 4056 ldp $rk6q, $rk7q, [$cc, #96] @ load rk6, rk7 4057 aese $ctr7b, $rk5 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 5 4058 rev64 $res7b, $res7b @ GHASH block 8k+7 4059 4060 aese $ctr4b, $rk5 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 5 4061 eor3 $acc_mb, $acc_mb, $h56k.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid 4062 aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 5 4063 4064 pmull $h4.1q, $res4.1d, $h4.1d @ GHASH block 8k+4 - low 4065 trn2 $res4.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid 4066 aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 5 4067 4068 aese $ctr6b, $rk5 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 5 4069 aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 5 4070 rev64 $res6b, $res6b @ GHASH block 8k+6 4071 4072 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k 4073 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k 4074 pmull2 $t5.1q, $res5.2d, $h3.2d @ GHASH block 8k+5 - high 4075 pmull $h3.1q, $res5.1d, $h3.1d @ GHASH block 8k+5 - low 4076 4077 aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 6 4078 eor $res4.16b, $res4.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid 4079 trn1 $t9.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid 4080 4081 aese $ctr7b, $rk6 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 6 4082 aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 6 4083 aese $ctr6b, $rk6 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 6 4084 4085 pmull2 $t7.1q, $res6.2d, $h2.2d @ GHASH block 8k+6 - high 4086 aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 6 4087 aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 6 4088 4089 aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 7 4090 aese $ctr6b, $rk7 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 7 4091 aese $ctr5b, $rk6 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 6 4092 4093 pmull2 $t6.1q, $res4.2d, $h34k.2d @ GHASH block 8k+4 - mid 4094 eor3 $acc_hb, $acc_hb, $t4.16b, $t5.16b @ GHASH block 8k+4, 8k+5 - high 4095 eor3 $acc_lb, $acc_lb, $h4.16b, $h3.16b @ GHASH block 8k+4, 8k+5 - low 4096 4097 pmull $h2.1q, $res6.1d, $h2.1d @ GHASH block 8k+6 - low 4098 trn2 $res6.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid 4099 aese $ctr4b, $rk6 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 6 4100 4101 aese $ctr5b, $rk7 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 7 4102 ldp $rk8q, $rk9q, [$cc, #128] @ load rk8, rk9 4103 aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 7 4104 4105 eor $res6.16b, $res6.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid 4106 pmull $h34k.1q, $res4.1d, $h34k.1d @ GHASH block 8k+5 - mid 4107 aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 7 4108 4109 aese $ctr4b, $rk7 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 7 4110 aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 7 4111 aese $ctr7b, $rk7 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 7 4112 4113 eor3 $acc_mb, $acc_mb, $h34k.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid 4114 pmull2 $t9.1q, $res6.2d, $h12k.2d @ GHASH block 8k+6 - mid 4115 pmull2 $t8.1q, $res7.2d, $h1.2d @ GHASH block 8k+7 - high 4116 4117 pmull $h12k.1q, $res6.1d, $h12k.1d @ GHASH block 8k+7 - mid 4118 ldr $mod_constantd, [$modulo_constant] @ MODULO - load modulo constant 4119 pmull $h1.1q, $res7.1d, $h1.1d @ GHASH block 8k+7 - low 4120 4121 aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 8 4122 aese $ctr5b, $rk8 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 8 4123 aese $ctr7b, $rk8 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 8 4124 4125 aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 8 4126 aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 8 4127 eor3 $acc_lb, $acc_lb, $h2.16b, $h1.16b @ GHASH block 8k+6, 8k+7 - low 4128 4129 aese $ctr4b, $rk8 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 8 4130 aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 8 4131 aese $ctr6b, $rk8 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 8 4132 4133 eor3 $acc_hb, $acc_hb, $t7.16b, $t8.16b @ GHASH block 8k+6, 8k+7 - high 4134 rev32 $h1.16b, $rtmp_ctr.16b @ CTR block 8k+16 4135 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+16 4136 4137 aese $ctr5b, $rk9 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 9 4138 eor3 $acc_mb, $acc_mb, $h12k.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid 4139 aese $ctr1b, $rk9 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 9 4140 4141 aese $ctr3b, $rk9 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 9 4142 aese $ctr7b, $rk9 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 9 4143 ldp $rk10q, $rk11q, [$cc, #160] @ load rk10, rk11 4144 4145 eor3 $acc_mb, $acc_mb, $acc_hb, $acc_lb @ MODULO - karatsuba tidy up 4146 ldp $res0q, $res1q, [$input_ptr], #32 @ AES block 8k+8, 8k+9 - load ciphertext 4147 4148 aese $ctr2b, $rk9 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 9 4149 aese $ctr0b, $rk9 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 9 4150 ldp $res2q, $res3q, [$input_ptr], #32 @ AES block 8k+10, 8k+11 - load ciphertext 4151 4152 rev32 $h2.16b, $rtmp_ctr.16b @ CTR block 8k+17 4153 pmull $t12.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid 4154 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+17 4155 4156 aese $ctr6b, $rk9 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 9 4157 aese $ctr4b, $rk9 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 9 4158 ext $t11.16b, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment 4159 4160 aese $ctr3b, $rk10 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 10 4161 aese $ctr7b, $rk10 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 10 4162 ldp $res4q, $res5q, [$input_ptr], #32 @ AES block 8k+12, 8k+13 - load ciphertext 4163 4164 rev32 $h3.16b, $rtmp_ctr.16b @ CTR block 8k+18 4165 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+18 4166 eor3 $acc_mb, $acc_mb, $t12.16b, $t11.16b @ MODULO - fold into mid 4167 4168 aese $ctr0b, $rk10 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 10 4169 aese $ctr1b, $rk10 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 10 4170 ldr $rk12q, [$cc, #192] @ load rk12 4171 4172 ldp $res6q, $res7q, [$input_ptr], #32 @ AES block 8k+14, 8k+15 - load ciphertext 4173 aese $ctr4b, $rk10 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 10 4174 aese $ctr6b, $rk10 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 10 4175 4176 aese $ctr0b, $rk11 @ AES block 8k+8 - round 11 4177 ext $t11.16b, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment 4178 aese $ctr1b, $rk11 @ AES block 8k+9 - round 11 4179 4180 aese $ctr2b, $rk10 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 10 4181 aese $ctr6b, $rk11 @ AES block 8k+14 - round 11 4182 aese $ctr3b, $rk11 @ AES block 8k+11 - round 11 4183 4184 eor3 $ctr0b, $res0b, $ctr0b, $rk12 @ AES block 8k+8 - result 4185 rev32 $h4.16b, $rtmp_ctr.16b @ CTR block 8k+19 4186 aese $ctr5b, $rk10 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 10 4187 4188 aese $ctr4b, $rk11 @ AES block 8k+12 - round 11 4189 aese $ctr2b, $rk11 @ AES block 8k+10 - round 11 4190 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+19 4191 4192 aese $ctr7b, $rk11 @ AES block 8k+15 - round 11 4193 aese $ctr5b, $rk11 @ AES block 8k+13 - round 11 4194 pmull $acc_h.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low 4195 4196 eor3 $ctr1b, $res1b, $ctr1b, $rk12 @ AES block 8k+9 - result 4197 stp $ctr0q, $ctr1q, [$output_ptr], #32 @ AES block 8k+8, 8k+9 - store result 4198 eor3 $ctr3b, $res3b, $ctr3b, $rk12 @ AES block 8k+11 - result 4199 4200 eor3 $ctr2b, $res2b, $ctr2b, $rk12 @ AES block 8k+10 - result 4201 eor3 $ctr7b, $res7b, $ctr7b, $rk12 @ AES block 8k+15 - result 4202 stp $ctr2q, $ctr3q, [$output_ptr], #32 @ AES block 8k+10, 8k+11 - store result 4203 4204 eor3 $ctr5b, $res5b, $ctr5b, $rk12 @ AES block 8k+13 - result 4205 eor3 $acc_lb, $acc_lb, $acc_hb, $t11.16b @ MODULO - fold into low 4206 mov $ctr3.16b, $h4.16b @ CTR block 8k+19 4207 4208 eor3 $ctr4b, $res4b, $ctr4b, $rk12 @ AES block 8k+12 - result 4209 stp $ctr4q, $ctr5q, [$output_ptr], #32 @ AES block 8k+12, 8k+13 - store result 4210 cmp $input_ptr, $main_end_input_ptr @ LOOP CONTROL 4211 4212 eor3 $ctr6b, $res6b, $ctr6b, $rk12 @ AES block 8k+14 - result 4213 stp $ctr6q, $ctr7q, [$output_ptr], #32 @ AES block 8k+14, 8k+15 - store result 4214 mov $ctr0.16b, $h1.16b @ CTR block 8k+16 4215 4216 mov $ctr1.16b, $h2.16b @ CTR block 8k+17 4217 mov $ctr2.16b, $h3.16b @ CTR block 8k+18 4218 4219 rev32 $ctr4.16b, $rtmp_ctr.16b @ CTR block 8k+20 4220 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+20 4221 b.lt .L192_dec_main_loop 4222 4223.L192_dec_prepretail: @ PREPRETAIL 4224 ldp $rk0q, $rk1q, [$cc, #0] @ load rk0, rk1 4225 rev32 $ctr5.16b, $rtmp_ctr.16b @ CTR block 8k+13 4226 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+13 4227 4228 ldr $h7q, [$current_tag, #176] @ load h7l | h7h 4229 ext $h7.16b, $h7.16b, $h7.16b, #8 4230 ldr $h8q, [$current_tag, #208] @ load h8l | h8h 4231 ext $h8.16b, $h8.16b, $h8.16b, #8 4232 rev64 $res0b, $res0b @ GHASH block 8k 4233 ext $acc_lb, $acc_lb, $acc_lb, #8 @ PRE 0 4234 4235 rev64 $res3b, $res3b @ GHASH block 8k+3 4236 rev32 $ctr6.16b, $rtmp_ctr.16b @ CTR block 8k+14 4237 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+14 4238 4239 eor $res0b, $res0b, $acc_lb @ PRE 1 4240 rev64 $res2b, $res2b @ GHASH block 8k+2 4241 rev64 $res1b, $res1b @ GHASH block 8k+1 4242 4243 ldr $h5q, [$current_tag, #128] @ load h5l | h5h 4244 ext $h5.16b, $h5.16b, $h5.16b, #8 4245 ldr $h6q, [$current_tag, #160] @ load h6l | h6h 4246 ext $h6.16b, $h6.16b, $h6.16b, #8 4247 rev32 $ctr7.16b, $rtmp_ctr.16b @ CTR block 8k+15 4248 4249 aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 0 4250 aese $ctr6b, $rk0 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 0 4251 aese $ctr5b, $rk0 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 0 4252 4253 aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 0 4254 aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 0 4255 pmull2 $t0.1q, $res1.2d, $h7.2d @ GHASH block 8k+1 - high 4256 4257 aese $ctr4b, $rk0 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 0 4258 pmull2 $acc_h.1q, $res0.2d, $h8.2d @ GHASH block 8k - high 4259 aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 0 4260 4261 aese $ctr6b, $rk1 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 1 4262 aese $ctr7b, $rk0 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 0 4263 ldp $rk2q, $rk3q, [$cc, #32] @ load rk2, rk3 4264 4265 aese $ctr4b, $rk1 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 1 4266 pmull2 $t1.1q, $res2.2d, $h6.2d @ GHASH block 8k+2 - high 4267 pmull $h6.1q, $res2.1d, $h6.1d @ GHASH block 8k+2 - low 4268 4269 pmull $h7.1q, $res1.1d, $h7.1d @ GHASH block 8k+1 - low 4270 eor $acc_hb, $acc_hb, $t0.16b @ GHASH block 8k+1 - high 4271 aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 1 4272 4273 pmull $acc_l.1q, $res0.1d, $h8.1d @ GHASH block 8k - low 4274 aese $ctr7b, $rk1 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 1 4275 aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 1 4276 4277 trn1 $acc_m.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid 4278 trn2 $res0.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid 4279 pmull2 $t2.1q, $res3.2d, $h5.2d @ GHASH block 8k+3 - high 4280 4281 aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 1 4282 aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 1 4283 aese $ctr5b, $rk1 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 1 4284 4285 ldr $h56kq, [$current_tag, #144] @ load h6k | h5k 4286 ldr $h78kq, [$current_tag, #192] @ load h8k | h7k 4287 aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 2 4288 eor $res0.16b, $res0.16b, $acc_m.16b @ GHASH block 8k, 8k+1 - mid 4289 4290 aese $ctr6b, $rk2 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 2 4291 rev64 $res5b, $res5b @ GHASH block 8k+5 4292 pmull $h5.1q, $res3.1d, $h5.1d @ GHASH block 8k+3 - low 4293 4294 eor3 $acc_hb, $acc_hb, $t1.16b, $t2.16b @ GHASH block 8k+2, 8k+3 - high 4295 aese $ctr4b, $rk2 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 2 4296 aese $ctr5b, $rk2 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 2 4297 4298 trn1 $t3.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid 4299 aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 3 4300 aese $ctr7b, $rk2 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 2 4301 4302 aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 2 4303 aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 2 4304 trn2 $res2.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid 4305 4306 pmull2 $acc_m.1q, $res0.2d, $h78k.2d @ GHASH block 8k - mid 4307 aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 2 4308 pmull $h78k.1q, $res0.1d, $h78k.1d @ GHASH block 8k+1 - mid 4309 4310 aese $ctr5b, $rk3 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 3 4311 eor $res2.16b, $res2.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid 4312 eor $acc_lb, $acc_lb, $h7.16b @ GHASH block 8k+1 - low 4313 4314 aese $ctr7b, $rk3 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 3 4315 aese $ctr6b, $rk3 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 3 4316 aese $ctr4b, $rk3 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 3 4317 4318 eor3 $acc_lb, $acc_lb, $h6.16b, $h5.16b @ GHASH block 8k+2, 8k+3 - low 4319 ldp $rk4q, $rk5q, [$cc, #64] @ load rk4, rk5 4320 aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 3 4321 4322 ldr $h3q, [$current_tag, #80] @ load h3l | h3h 4323 ext $h3.16b, $h3.16b, $h3.16b, #8 4324 ldr $h4q, [$current_tag, #112] @ load h4l | h4h 4325 ext $h4.16b, $h4.16b, $h4.16b, #8 4326 pmull2 $t3.1q, $res2.2d, $h56k.2d @ GHASH block 8k+2 - mid 4327 pmull $h56k.1q, $res2.1d, $h56k.1d @ GHASH block 8k+3 - mid 4328 4329 ldr $h1q, [$current_tag, #32] @ load h1l | h1h 4330 ext $h1.16b, $h1.16b, $h1.16b, #8 4331 ldr $h2q, [$current_tag, #64] @ load h2l | h2h 4332 ext $h2.16b, $h2.16b, $h2.16b, #8 4333 eor $acc_mb, $acc_mb, $h78k.16b @ GHASH block 8k+1 - mid 4334 aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 3 4335 4336 rev64 $res7b, $res7b @ GHASH block 8k+7 4337 4338 eor3 $acc_mb, $acc_mb, $h56k.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid 4339 rev64 $res4b, $res4b @ GHASH block 8k+4 4340 4341 aese $ctr5b, $rk4 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 4 4342 aese $ctr4b, $rk4 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 4 4343 aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 3 4344 4345 aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 4 4346 aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 4 4347 aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 4 4348 4349 aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 4 4350 aese $ctr6b, $rk4 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 4 4351 aese $ctr7b, $rk4 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 4 4352 4353 rev64 $res6b, $res6b @ GHASH block 8k+6 4354 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k 4355 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k 4356 trn1 $t6.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid 4357 4358 aese $ctr7b, $rk5 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 5 4359 aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 5 4360 aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 5 4361 4362 ldp $rk6q, $rk7q, [$cc, #96] @ load rk6, rk7 4363 aese $ctr6b, $rk5 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 5 4364 aese $ctr5b, $rk5 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 5 4365 4366 pmull2 $t7.1q, $res6.2d, $h2.2d @ GHASH block 8k+6 - high 4367 pmull2 $t4.1q, $res4.2d, $h4.2d @ GHASH block 8k+4 - high 4368 pmull $h2.1q, $res6.1d, $h2.1d @ GHASH block 8k+6 - low 4369 4370 aese $ctr4b, $rk5 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 5 4371 4372 pmull $h4.1q, $res4.1d, $h4.1d @ GHASH block 8k+4 - low 4373 trn2 $res4.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid 4374 pmull2 $t5.1q, $res5.2d, $h3.2d @ GHASH block 8k+5 - high 4375 4376 pmull $h3.1q, $res5.1d, $h3.1d @ GHASH block 8k+5 - low 4377 trn1 $t9.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid 4378 aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 5 4379 4380 trn2 $res6.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid 4381 aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 5 4382 eor $res4.16b, $res4.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid 4383 4384 aese $ctr4b, $rk6 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 6 4385 aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 6 4386 4387 eor $res6.16b, $res6.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid 4388 aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 6 4389 aese $ctr7b, $rk6 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 6 4390 4391 pmull2 $t6.1q, $res4.2d, $h34k.2d @ GHASH block 8k+4 - mid 4392 pmull $h34k.1q, $res4.1d, $h34k.1d @ GHASH block 8k+5 - mid 4393 aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 6 4394 4395 pmull2 $t9.1q, $res6.2d, $h12k.2d @ GHASH block 8k+6 - mid 4396 aese $ctr5b, $rk6 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 6 4397 pmull2 $t8.1q, $res7.2d, $h1.2d @ GHASH block 8k+7 - high 4398 4399 eor3 $acc_mb, $acc_mb, $h34k.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid 4400 aese $ctr4b, $rk7 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 7 4401 eor3 $acc_lb, $acc_lb, $h4.16b, $h3.16b @ GHASH block 8k+4, 8k+5 - low 4402 4403 aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 6 4404 aese $ctr6b, $rk6 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 6 4405 aese $ctr5b, $rk7 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 7 4406 4407 ldp $rk8q, $rk9q, [$cc, #128] @ load rk8, rk9 4408 pmull $h12k.1q, $res6.1d, $h12k.1d @ GHASH block 8k+7 - mid 4409 aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 7 4410 4411 ldr $mod_constantd, [$modulo_constant] @ MODULO - load modulo constant 4412 eor3 $acc_hb, $acc_hb, $t4.16b, $t5.16b @ GHASH block 8k+4, 8k+5 - high 4413 pmull $h1.1q, $res7.1d, $h1.1d @ GHASH block 8k+7 - low 4414 4415 aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 7 4416 aese $ctr7b, $rk7 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 7 4417 aese $ctr6b, $rk7 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 7 4418 4419 eor3 $acc_hb, $acc_hb, $t7.16b, $t8.16b @ GHASH block 8k+6, 8k+7 - high 4420 eor3 $acc_lb, $acc_lb, $h2.16b, $h1.16b @ GHASH block 8k+6, 8k+7 - low 4421 eor3 $acc_mb, $acc_mb, $h12k.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid 4422 4423 aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 7 4424 aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 7 4425 4426 eor3 $acc_mb, $acc_mb, $acc_hb, $acc_lb @ MODULO - karatsuba tidy up 4427 ext $t11.16b, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment 4428 aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 8 4429 4430 aese $ctr6b, $rk8 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 8 4431 aese $ctr7b, $rk8 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 8 4432 aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 8 4433 4434 aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 8 4435 pmull $t12.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid 4436 aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 8 4437 4438 aese $ctr5b, $rk8 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 8 4439 aese $ctr4b, $rk8 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 8 4440 ldp $rk10q, $rk11q, [$cc, #160] @ load rk10, rk11 4441 4442 eor3 $acc_mb, $acc_mb, $t12.16b, $t11.16b @ MODULO - fold into mid 4443 aese $ctr7b, $rk9 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 9 4444 aese $ctr6b, $rk9 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 9 4445 4446 aese $ctr5b, $rk9 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 9 4447 aese $ctr2b, $rk9 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 9 4448 aese $ctr3b, $rk9 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 9 4449 4450 aese $ctr0b, $rk9 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 9 4451 aese $ctr1b, $rk9 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 9 4452 aese $ctr4b, $rk9 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 9 4453 4454 pmull $acc_h.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low 4455 ldr $rk12q, [$cc, #192] @ load rk12 4456 ext $t11.16b, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment 4457 4458 aese $ctr2b, $rk10 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 10 4459 aese $ctr5b, $rk10 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 10 4460 aese $ctr0b, $rk10 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 10 4461 4462 aese $ctr4b, $rk10 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 10 4463 aese $ctr6b, $rk10 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 10 4464 aese $ctr7b, $rk10 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 10 4465 4466 aese $ctr0b, $rk11 @ AES block 8k+8 - round 11 4467 eor3 $acc_lb, $acc_lb, $acc_hb, $t11.16b @ MODULO - fold into low 4468 aese $ctr5b, $rk11 @ AES block 8k+13 - round 11 4469 4470 aese $ctr2b, $rk11 @ AES block 8k+10 - round 11 4471 aese $ctr3b, $rk10 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 10 4472 aese $ctr1b, $rk10 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 10 4473 4474 aese $ctr6b, $rk11 @ AES block 8k+14 - round 11 4475 aese $ctr4b, $rk11 @ AES block 8k+12 - round 11 4476 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+15 4477 4478 aese $ctr3b, $rk11 @ AES block 8k+11 - round 11 4479 aese $ctr1b, $rk11 @ AES block 8k+9 - round 11 4480 aese $ctr7b, $rk11 @ AES block 8k+15 - round 11 4481 4482.L192_dec_tail: @ TAIL 4483 4484 sub $main_end_input_ptr, $end_input_ptr, $input_ptr @ main_end_input_ptr is number of bytes left to process 4485 4486 ldp $h5q, $h56kq, [$current_tag, #128] @ load h5l | h5h 4487 ext $h5.16b, $h5.16b, $h5.16b, #8 4488 ldr $res1q, [$input_ptr], #16 @ AES block 8k+8 - load ciphertext 4489 4490 ldp $h78kq, $h8q, [$current_tag, #192] @ load h8k | h7k 4491 ext $h8.16b, $h8.16b, $h8.16b, #8 4492 4493 mov $t1.16b, $rk12 4494 4495 ldp $h6q, $h7q, [$current_tag, #160] @ load h6l | h6h 4496 ext $h6.16b, $h6.16b, $h6.16b, #8 4497 ext $h7.16b, $h7.16b, $h7.16b, #8 4498 ext $t0.16b, $acc_lb, $acc_lb, #8 @ prepare final partial tag 4499 4500 eor3 $res4b, $res1b, $ctr0b, $t1.16b @ AES block 8k+8 - result 4501 cmp $main_end_input_ptr, #112 4502 b.gt .L192_dec_blocks_more_than_7 4503 4504 mov $ctr7b, $ctr6b 4505 movi $acc_h.8b, #0 4506 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 4507 4508 mov $ctr6b, $ctr5b 4509 mov $ctr5b, $ctr4b 4510 mov $ctr4b, $ctr3b 4511 4512 cmp $main_end_input_ptr, #96 4513 movi $acc_l.8b, #0 4514 mov $ctr3b, $ctr2b 4515 4516 mov $ctr2b, $ctr1b 4517 movi $acc_m.8b, #0 4518 b.gt .L192_dec_blocks_more_than_6 4519 4520 mov $ctr7b, $ctr6b 4521 mov $ctr6b, $ctr5b 4522 mov $ctr5b, $ctr4b 4523 4524 mov $ctr4b, $ctr3b 4525 mov $ctr3b, $ctr1b 4526 4527 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 4528 cmp $main_end_input_ptr, #80 4529 b.gt .L192_dec_blocks_more_than_5 4530 4531 mov $ctr7b, $ctr6b 4532 mov $ctr6b, $ctr5b 4533 4534 mov $ctr5b, $ctr4b 4535 mov $ctr4b, $ctr1b 4536 cmp $main_end_input_ptr, #64 4537 4538 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 4539 b.gt .L192_dec_blocks_more_than_4 4540 4541 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 4542 mov $ctr7b, $ctr6b 4543 mov $ctr6b, $ctr5b 4544 4545 mov $ctr5b, $ctr1b 4546 cmp $main_end_input_ptr, #48 4547 b.gt .L192_dec_blocks_more_than_3 4548 4549 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 4550 mov $ctr7b, $ctr6b 4551 cmp $main_end_input_ptr, #32 4552 4553 mov $ctr6b, $ctr1b 4554 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k 4555 b.gt .L192_dec_blocks_more_than_2 4556 4557 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 4558 4559 mov $ctr7b, $ctr1b 4560 cmp $main_end_input_ptr, #16 4561 b.gt .L192_dec_blocks_more_than_1 4562 4563 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 4564 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k 4565 b .L192_dec_blocks_less_than_1 4566.L192_dec_blocks_more_than_7: @ blocks left > 7 4567 rev64 $res0b, $res1b @ GHASH final-7 block 4568 4569 ins $acc_m.d[0], $h78k.d[1] @ GHASH final-7 block - mid 4570 eor $res0b, $res0b, $t0.16b @ feed in partial tag 4571 4572 pmull2 $acc_h.1q, $res0.2d, $h8.2d @ GHASH final-7 block - high 4573 ins $rk4v.d[0], $res0.d[1] @ GHASH final-7 block - mid 4574 ldr $res1q, [$input_ptr], #16 @ AES final-6 block - load ciphertext 4575 4576 pmull $acc_l.1q, $res0.1d, $h8.1d @ GHASH final-7 block - low 4577 4578 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-7 block - mid 4579 st1 { $res4b}, [$output_ptr], #16 @ AES final-7 block - store result 4580 4581 eor3 $res4b, $res1b, $ctr1b, $t1.16b @ AES final-6 block - result 4582 4583 pmull $acc_m.1q, $rk4v.1d, $acc_m.1d @ GHASH final-7 block - mid 4584 movi $t0.8b, #0 @ suppress further partial tag feed in 4585.L192_dec_blocks_more_than_6: @ blocks left > 6 4586 4587 rev64 $res0b, $res1b @ GHASH final-6 block 4588 4589 eor $res0b, $res0b, $t0.16b @ feed in partial tag 4590 4591 ldr $res1q, [$input_ptr], #16 @ AES final-5 block - load ciphertext 4592 ins $rk4v.d[0], $res0.d[1] @ GHASH final-6 block - mid 4593 4594 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-6 block - mid 4595 movi $t0.8b, #0 @ suppress further partial tag feed in 4596 pmull2 $rk2q1, $res0.2d, $h7.2d @ GHASH final-6 block - high 4597 4598 st1 { $res4b}, [$output_ptr], #16 @ AES final-6 block - store result 4599 eor3 $res4b, $res1b, $ctr2b, $t1.16b @ AES final-5 block - result 4600 4601 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-6 block - high 4602 pmull $rk4v.1q, $rk4v.1d, $h78k.1d @ GHASH final-6 block - mid 4603 pmull $rk3q1, $res0.1d, $h7.1d @ GHASH final-6 block - low 4604 4605 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-6 block - mid 4606 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-6 block - low 4607.L192_dec_blocks_more_than_5: @ blocks left > 5 4608 4609 rev64 $res0b, $res1b @ GHASH final-5 block 4610 4611 eor $res0b, $res0b, $t0.16b @ feed in partial tag 4612 4613 ins $rk4v.d[0], $res0.d[1] @ GHASH final-5 block - mid 4614 4615 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-5 block - mid 4616 4617 ins $rk4v.d[1], $rk4v.d[0] @ GHASH final-5 block - mid 4618 pmull2 $rk2q1, $res0.2d, $h6.2d @ GHASH final-5 block - high 4619 4620 ldr $res1q, [$input_ptr], #16 @ AES final-4 block - load ciphertext 4621 4622 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-5 block - high 4623 pmull $rk3q1, $res0.1d, $h6.1d @ GHASH final-5 block - low 4624 4625 pmull2 $rk4v.1q, $rk4v.2d, $h56k.2d @ GHASH final-5 block - mid 4626 4627 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-5 block - low 4628 movi $t0.8b, #0 @ suppress further partial tag feed in 4629 st1 { $res4b}, [$output_ptr], #16 @ AES final-5 block - store result 4630 4631 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-5 block - mid 4632 eor3 $res4b, $res1b, $ctr3b, $t1.16b @ AES final-4 block - result 4633.L192_dec_blocks_more_than_4: @ blocks left > 4 4634 4635 rev64 $res0b, $res1b @ GHASH final-4 block 4636 4637 eor $res0b, $res0b, $t0.16b @ feed in partial tag 4638 movi $t0.8b, #0 @ suppress further partial tag feed in 4639 4640 ldr $res1q, [$input_ptr], #16 @ AES final-3 block - load ciphertext 4641 ins $rk4v.d[0], $res0.d[1] @ GHASH final-4 block - mid 4642 pmull $rk3q1, $res0.1d, $h5.1d @ GHASH final-4 block - low 4643 4644 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-4 block - mid 4645 4646 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-4 block - low 4647 4648 pmull $rk4v.1q, $rk4v.1d, $h56k.1d @ GHASH final-4 block - mid 4649 st1 { $res4b}, [$output_ptr], #16 @ AES final-4 block - store result 4650 pmull2 $rk2q1, $res0.2d, $h5.2d @ GHASH final-4 block - high 4651 4652 eor3 $res4b, $res1b, $ctr4b, $t1.16b @ AES final-3 block - result 4653 4654 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-4 block - mid 4655 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-4 block - high 4656.L192_dec_blocks_more_than_3: @ blocks left > 3 4657 4658 ldr $h4q, [$current_tag, #112] @ load h4l | h4h 4659 ext $h4.16b, $h4.16b, $h4.16b, #8 4660 rev64 $res0b, $res1b @ GHASH final-3 block 4661 ldr $res1q, [$input_ptr], #16 @ AES final-2 block - load ciphertext 4662 4663 eor $res0b, $res0b, $t0.16b @ feed in partial tag 4664 4665 ins $rk4v.d[0], $res0.d[1] @ GHASH final-3 block - mid 4666 pmull2 $rk2q1, $res0.2d, $h4.2d @ GHASH final-3 block - high 4667 4668 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-3 block - high 4669 movi $t0.8b, #0 @ suppress further partial tag feed in 4670 pmull $rk3q1, $res0.1d, $h4.1d @ GHASH final-3 block - low 4671 4672 st1 { $res4b}, [$output_ptr], #16 @ AES final-3 block - store result 4673 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-3 block - mid 4674 eor3 $res4b, $res1b, $ctr5b, $t1.16b @ AES final-2 block - result 4675 4676 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-3 block - low 4677 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k 4678 4679 ins $rk4v.d[1], $rk4v.d[0] @ GHASH final-3 block - mid 4680 4681 pmull2 $rk4v.1q, $rk4v.2d, $h34k.2d @ GHASH final-3 block - mid 4682 4683 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-3 block - mid 4684.L192_dec_blocks_more_than_2: @ blocks left > 2 4685 4686 rev64 $res0b, $res1b @ GHASH final-2 block 4687 ldr $h3q, [$current_tag, #80] @ load h3l | h3h 4688 ext $h3.16b, $h3.16b, $h3.16b, #8 4689 4690 eor $res0b, $res0b, $t0.16b @ feed in partial tag 4691 4692 ins $rk4v.d[0], $res0.d[1] @ GHASH final-2 block - mid 4693 ldr $res1q, [$input_ptr], #16 @ AES final-1 block - load ciphertext 4694 4695 pmull2 $rk2q1, $res0.2d, $h3.2d @ GHASH final-2 block - high 4696 4697 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-2 block - mid 4698 4699 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-2 block - high 4700 pmull $rk3q1, $res0.1d, $h3.1d @ GHASH final-2 block - low 4701 4702 pmull $rk4v.1q, $rk4v.1d, $h34k.1d @ GHASH final-2 block - mid 4703 movi $t0.8b, #0 @ suppress further partial tag feed in 4704 4705 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-2 block - low 4706 st1 { $res4b}, [$output_ptr], #16 @ AES final-2 block - store result 4707 4708 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-2 block - mid 4709 eor3 $res4b, $res1b, $ctr6b, $t1.16b @ AES final-1 block - result 4710.L192_dec_blocks_more_than_1: @ blocks left > 1 4711 4712 rev64 $res0b, $res1b @ GHASH final-1 block 4713 ldr $res1q, [$input_ptr], #16 @ AES final block - load ciphertext 4714 ldr $h2q, [$current_tag, #64] @ load h1l | h1h 4715 ext $h2.16b, $h2.16b, $h2.16b, #8 4716 4717 eor $res0b, $res0b, $t0.16b @ feed in partial tag 4718 movi $t0.8b, #0 @ suppress further partial tag feed in 4719 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k 4720 4721 pmull $rk3q1, $res0.1d, $h2.1d @ GHASH final-1 block - low 4722 ins $rk4v.d[0], $res0.d[1] @ GHASH final-1 block - mid 4723 st1 { $res4b}, [$output_ptr], #16 @ AES final-1 block - store result 4724 4725 pmull2 $rk2q1, $res0.2d, $h2.2d @ GHASH final-1 block - high 4726 4727 eor3 $res4b, $res1b, $ctr7b, $t1.16b @ AES final block - result 4728 4729 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-1 block - mid 4730 4731 ins $rk4v.d[1], $rk4v.d[0] @ GHASH final-1 block - mid 4732 4733 pmull2 $rk4v.1q, $rk4v.2d, $h12k.2d @ GHASH final-1 block - mid 4734 4735 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-1 block - low 4736 4737 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-1 block - mid 4738 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-1 block - high 4739.L192_dec_blocks_less_than_1: @ blocks left <= 1 4740 4741 rev32 $rtmp_ctr.16b, $rtmp_ctr.16b 4742 and $bit_length, $bit_length, #127 @ bit_length %= 128 4743 4744 sub $bit_length, $bit_length, #128 @ bit_length -= 128 4745 str $rtmp_ctrq, [$counter] @ store the updated counter 4746 4747 neg $bit_length, $bit_length @ bit_length = 128 - #bits in input (in range [1,128]) 4748 mvn $temp0_x, xzr @ temp0_x = 0xffffffffffffffff 4749 4750 and $bit_length, $bit_length, #127 @ bit_length %= 128 4751 4752 mvn $temp1_x, xzr @ temp1_x = 0xffffffffffffffff 4753 lsr $temp0_x, $temp0_x, $bit_length @ temp0_x is mask for top 64b of last block 4754 cmp $bit_length, #64 4755 4756 csel $temp2_x, $temp1_x, $temp0_x, lt 4757 csel $temp3_x, $temp0_x, xzr, lt 4758 ldr $h1q, [$current_tag, #32] @ load h1l | h1h 4759 ext $h1.16b, $h1.16b, $h1.16b, #8 4760 4761 mov $ctr0.d[1], $temp3_x 4762 ld1 { $rk0}, [$output_ptr] @ load existing bytes where the possibly partial last block is to be stored 4763 4764 mov $ctr0.d[0], $temp2_x @ ctr0b is mask for last block 4765 4766 and $res1b, $res1b, $ctr0b @ possibly partial last block has zeroes in highest bits 4767 bif $res4b, $rk0, $ctr0b @ insert existing bytes in top end of result before storing 4768 4769 rev64 $res0b, $res1b @ GHASH final block 4770 4771 st1 { $res4b}, [$output_ptr] @ store all 16B 4772 4773 eor $res0b, $res0b, $t0.16b @ feed in partial tag 4774 4775 ins $t0.d[0], $res0.d[1] @ GHASH final block - mid 4776 pmull $rk3q1, $res0.1d, $h1.1d @ GHASH final block - low 4777 4778 eor $t0.8b, $t0.8b, $res0.8b @ GHASH final block - mid 4779 pmull2 $rk2q1, $res0.2d, $h1.2d @ GHASH final block - high 4780 eor $acc_lb, $acc_lb, $rk3 @ GHASH final block - low 4781 4782 pmull $t0.1q, $t0.1d, $h12k.1d @ GHASH final block - mid 4783 eor $acc_hb, $acc_hb, $rk2 @ GHASH final block - high 4784 4785 eor $t10.16b, $acc_hb, $acc_lb @ MODULO - karatsuba tidy up 4786 eor $acc_mb, $acc_mb, $t0.16b @ GHASH final block - mid 4787 ldr $mod_constantd, [$modulo_constant] @ MODULO - load modulo constant 4788 4789 pmull $t11.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid 4790 ext $acc_hb, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment 4791 4792 eor $acc_mb, $acc_mb, $t10.16b @ MODULO - karatsuba tidy up 4793 4794 eor3 $acc_mb, $acc_mb, $acc_hb, $t11.16b @ MODULO - fold into mid 4795 4796 pmull $acc_h.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low 4797 ext $acc_mb, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment 4798 4799 eor3 $acc_lb, $acc_lb, $acc_mb, $acc_hb @ MODULO - fold into low 4800 ext $acc_lb, $acc_lb, $acc_lb, #8 4801 rev64 $acc_lb, $acc_lb 4802 st1 { $acc_l.16b }, [$current_tag] 4803 4804 mov x0, $byte_length 4805 4806 ldp d10, d11, [sp, #16] 4807 ldp d12, d13, [sp, #32] 4808 ldp d14, d15, [sp, #48] 4809 ldp d8, d9, [sp], #80 4810 ret 4811 4812.L192_dec_ret: 4813 mov w0, #0x0 4814 ret 4815.size unroll8_eor3_aes_gcm_dec_192_kernel,.-unroll8_eor3_aes_gcm_dec_192_kernel 4816___ 4817} 4818 4819{ 4820 4821my ($end_input_ptr,$main_end_input_ptr,$temp0_x,$temp1_x)=map("x$_",(4..7)); 4822my ($temp2_x,$temp3_x)=map("x$_",(13..14)); 4823my ($ctr0b,$ctr1b,$ctr2b,$ctr3b,$ctr4b,$ctr5b,$ctr6b,$ctr7b,$res0b,$res1b,$res2b,$res3b,$res4b,$res5b,$res6b,$res7b)=map("v$_.16b",(0..15)); 4824my ($ctr0,$ctr1,$ctr2,$ctr3,$ctr4,$ctr5,$ctr6,$ctr7,$res0,$res1,$res2,$res3,$res4,$res5,$res6,$res7)=map("v$_",(0..15)); 4825my ($ctr0d,$ctr1d,$ctr2d,$ctr3d,$ctr4d,$ctr5d,$ctr6d,$ctr7d)=map("d$_",(0..7)); 4826my ($ctr0q,$ctr1q,$ctr2q,$ctr3q,$ctr4q,$ctr5q,$ctr6q,$ctr7q)=map("q$_",(0..7)); 4827my ($res0q,$res1q,$res2q,$res3q,$res4q,$res5q,$res6q,$res7q)=map("q$_",(8..15)); 4828 4829my ($ctr_t0,$ctr_t1,$ctr_t2,$ctr_t3,$ctr_t4,$ctr_t5,$ctr_t6,$ctr_t7)=map("v$_",(8..15)); 4830my ($ctr_t0b,$ctr_t1b,$ctr_t2b,$ctr_t3b,$ctr_t4b,$ctr_t5b,$ctr_t6b,$ctr_t7b)=map("v$_.16b",(8..15)); 4831my ($ctr_t0q,$ctr_t1q,$ctr_t2q,$ctr_t3q,$ctr_t4q,$ctr_t5q,$ctr_t6q,$ctr_t7q)=map("q$_",(8..15)); 4832 4833my ($acc_hb,$acc_mb,$acc_lb)=map("v$_.16b",(17..19)); 4834my ($acc_h,$acc_m,$acc_l)=map("v$_",(17..19)); 4835 4836my ($h1,$h12k,$h2,$h3,$h34k,$h4)=map("v$_",(20..25)); 4837my ($h5,$h56k,$h6,$h7,$h78k,$h8)=map("v$_",(20..25)); 4838my ($h1q,$h12kq,$h2q,$h3q,$h34kq,$h4q)=map("q$_",(20..25)); 4839my ($h5q,$h56kq,$h6q,$h7q,$h78kq,$h8q)=map("q$_",(20..25)); 4840 4841my $t0="v16"; 4842my $t0d="d16"; 4843 4844my $t1="v29"; 4845my $t2=$res1; 4846my $t3=$t1; 4847 4848my $t4=$res0; 4849my $t5=$res2; 4850my $t6=$t0; 4851 4852my $t7=$res3; 4853my $t8=$res4; 4854my $t9=$res5; 4855 4856my $t10=$res6; 4857my $t11="v21"; 4858my $t12=$t1; 4859 4860my $rtmp_ctr="v30"; 4861my $rtmp_ctrq="q30"; 4862my $rctr_inc="v31"; 4863my $rctr_incd="d31"; 4864 4865my $mod_constantd=$t0d; 4866my $mod_constant=$t0; 4867 4868my ($rk0,$rk1,$rk2)=map("v$_.16b",(26..28)); 4869my ($rk3,$rk4,$rk5)=map("v$_.16b",(26..28)); 4870my ($rk6,$rk7,$rk8)=map("v$_.16b",(26..28)); 4871my ($rk9,$rk10,$rk11)=map("v$_.16b",(26..28)); 4872my ($rk12,$rk13,$rk14)=map("v$_.16b",(26..28)); 4873my ($rk0q,$rk1q,$rk2q)=map("q$_",(26..28)); 4874my ($rk3q,$rk4q,$rk5q)=map("q$_",(26..28)); 4875my ($rk6q,$rk7q,$rk8q)=map("q$_",(26..28)); 4876my ($rk9q,$rk10q,$rk11q)=map("q$_",(26..28)); 4877my ($rk12q,$rk13q,$rk14q)=map("q$_",(26..28)); 4878my $rk2q1="v28.1q"; 4879my $rk3q1="v26.1q"; 4880my $rk4v="v27"; 4881######################################################################################### 4882# size_t unroll8_eor3_aes_gcm_enc_256_kernel(const uint8_t * plaintext, 4883# uint64_t plaintext_length, 4884# uint8_t * ciphertext, 4885# uint64_t *Xi, 4886# unsigned char ivec[16], 4887# const void *key); 4888# 4889$code.=<<___; 4890.global unroll8_eor3_aes_gcm_enc_256_kernel 4891.type unroll8_eor3_aes_gcm_enc_256_kernel,%function 4892.align 4 4893unroll8_eor3_aes_gcm_enc_256_kernel: 4894 AARCH64_VALID_CALL_TARGET 4895 cbz x1, .L256_enc_ret 4896 stp d8, d9, [sp, #-80]! 4897 lsr $byte_length, $bit_length, #3 4898 mov $counter, x4 4899 mov $cc, x5 4900 stp d10, d11, [sp, #16] 4901 stp d12, d13, [sp, #32] 4902 stp d14, d15, [sp, #48] 4903 mov x5, #0xc200000000000000 4904 stp x5, xzr, [sp, #64] 4905 add $modulo_constant, sp, #64 4906 4907 ld1 { $ctr0b}, [$counter] @ CTR block 0 4908 4909 mov $main_end_input_ptr, $byte_length 4910 4911 mov $constant_temp, #0x100000000 @ set up counter increment 4912 movi $rctr_inc.16b, #0x0 4913 mov $rctr_inc.d[1], $constant_temp 4914 sub $main_end_input_ptr, $main_end_input_ptr, #1 @ byte_len - 1 4915 4916 and $main_end_input_ptr, $main_end_input_ptr, #0xffffffffffffff80 @ number of bytes to be processed in main loop (at least 1 byte must be handled by tail) 4917 4918 add $main_end_input_ptr, $main_end_input_ptr, $input_ptr 4919 4920 rev32 $rtmp_ctr.16b, $ctr0.16b @ set up reversed counter 4921 4922 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 0 4923 4924 rev32 $ctr1.16b, $rtmp_ctr.16b @ CTR block 1 4925 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 1 4926 4927 rev32 $ctr2.16b, $rtmp_ctr.16b @ CTR block 2 4928 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 2 4929 4930 rev32 $ctr3.16b, $rtmp_ctr.16b @ CTR block 3 4931 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 3 4932 4933 rev32 $ctr4.16b, $rtmp_ctr.16b @ CTR block 4 4934 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 4 4935 4936 rev32 $ctr5.16b, $rtmp_ctr.16b @ CTR block 5 4937 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 5 4938 ldp $rk0q, $rk1q, [$cc, #0] @ load rk0, rk1 4939 4940 rev32 $ctr6.16b, $rtmp_ctr.16b @ CTR block 6 4941 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 6 4942 4943 rev32 $ctr7.16b, $rtmp_ctr.16b @ CTR block 7 4944 4945 aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 0 4946 aese $ctr4b, $rk0 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 0 4947 aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 0 4948 4949 aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 0 4950 aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 0 4951 aese $ctr6b, $rk0 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 0 4952 4953 aese $ctr5b, $rk0 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 0 4954 aese $ctr7b, $rk0 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 0 4955 ldp $rk2q, $rk3q, [$cc, #32] @ load rk2, rk3 4956 4957 aese $ctr4b, $rk1 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 1 4958 aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 1 4959 aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 1 4960 4961 aese $ctr6b, $rk1 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 1 4962 aese $ctr5b, $rk1 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 1 4963 4964 aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 1 4965 4966 aese $ctr7b, $rk1 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 1 4967 4968 aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 2 4969 aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 2 4970 aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 1 4971 4972 aese $ctr7b, $rk2 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 2 4973 aese $ctr6b, $rk2 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 2 4974 aese $ctr5b, $rk2 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 2 4975 4976 aese $ctr4b, $rk2 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 2 4977 aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 2 4978 aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 2 4979 4980 aese $ctr5b, $rk3 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 3 4981 aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 3 4982 ldp $rk4q, $rk5q, [$cc, #64] @ load rk4, rk5 4983 4984 aese $ctr4b, $rk3 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 3 4985 4986 aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 3 4987 aese $ctr6b, $rk3 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 3 4988 aese $ctr7b, $rk3 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 3 4989 4990 aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 3 4991 aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 3 4992 4993 aese $ctr4b, $rk4 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 4 4994 aese $ctr6b, $rk4 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 4 4995 aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 4 4996 4997 aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 4 4998 aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 4 4999 5000 aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 4 5001 aese $ctr7b, $rk4 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 4 5002 aese $ctr5b, $rk4 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 4 5003 5004 aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 5 5005 aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 5 5006 ldp $rk6q, $rk7q, [$cc, #96] @ load rk6, rk7 5007 5008 aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 5 5009 aese $ctr4b, $rk5 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 5 5010 aese $ctr5b, $rk5 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 5 5011 5012 aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 5 5013 aese $ctr6b, $rk5 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 5 5014 aese $ctr7b, $rk5 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 5 5015 5016 aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 6 5017 aese $ctr5b, $rk6 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 6 5018 aese $ctr4b, $rk6 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 6 5019 5020 aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 6 5021 aese $ctr6b, $rk6 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 6 5022 aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 6 5023 5024 aese $ctr7b, $rk6 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 6 5025 aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 6 5026 ldp $rk8q, $rk9q, [$cc, #128] @ load rk8, rk9 5027 5028 aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 7 5029 aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 7 5030 5031 aese $ctr7b, $rk7 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 7 5032 aese $ctr6b, $rk7 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 7 5033 aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 7 5034 5035 aese $ctr5b, $rk7 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 7 5036 aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 7 5037 5038 aese $ctr4b, $rk7 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 7 5039 5040 aese $ctr6b, $rk8 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 8 5041 aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 8 5042 5043 aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 8 5044 aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 8 5045 aese $ctr7b, $rk8 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 8 5046 5047 aese $ctr5b, $rk8 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 8 5048 aese $ctr4b, $rk8 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 8 5049 aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 8 5050 5051 ld1 { $acc_lb}, [$current_tag] 5052 ext $acc_lb, $acc_lb, $acc_lb, #8 5053 rev64 $acc_lb, $acc_lb 5054 ldp $rk10q, $rk11q, [$cc, #160] @ load rk10, rk11 5055 5056 aese $ctr6b, $rk9 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 9 5057 aese $ctr7b, $rk9 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 9 5058 aese $ctr3b, $rk9 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 9 5059 5060 aese $ctr4b, $rk9 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 9 5061 aese $ctr5b, $rk9 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 9 5062 aese $ctr2b, $rk9 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 9 5063 5064 aese $ctr1b, $rk9 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 9 5065 5066 aese $ctr7b, $rk10 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 10 5067 aese $ctr4b, $rk10 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 10 5068 aese $ctr0b, $rk9 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 9 5069 5070 aese $ctr1b, $rk10 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 10 5071 aese $ctr5b, $rk10 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 10 5072 aese $ctr3b, $rk10 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 10 5073 5074 aese $ctr2b, $rk10 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 10 5075 aese $ctr0b, $rk10 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 10 5076 aese $ctr6b, $rk10 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 10 5077 5078 aese $ctr4b, $rk11 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 11 5079 ldp $rk12q, $rk13q, [$cc, #192] @ load rk12, rk13 5080 aese $ctr5b, $rk11 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 11 5081 5082 aese $ctr2b, $rk11 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 11 5083 aese $ctr6b, $rk11 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 11 5084 aese $ctr1b, $rk11 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 11 5085 5086 aese $ctr0b, $rk11 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 11 5087 aese $ctr3b, $rk11 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 11 5088 aese $ctr7b, $rk11 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 11 5089 5090 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 7 5091 ldr $rk14q, [$cc, #224] @ load rk14 5092 5093 aese $ctr4b, $rk12 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 12 5094 aese $ctr2b, $rk12 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 12 5095 aese $ctr1b, $rk12 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 12 5096 5097 aese $ctr0b, $rk12 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 12 5098 aese $ctr5b, $rk12 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 12 5099 aese $ctr3b, $rk12 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 12 5100 5101 aese $ctr2b, $rk13 @ AES block 2 - round 13 5102 aese $ctr1b, $rk13 @ AES block 1 - round 13 5103 aese $ctr4b, $rk13 @ AES block 4 - round 13 5104 5105 aese $ctr6b, $rk12 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 12 5106 aese $ctr7b, $rk12 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 12 5107 5108 aese $ctr0b, $rk13 @ AES block 0 - round 13 5109 aese $ctr5b, $rk13 @ AES block 5 - round 13 5110 5111 aese $ctr6b, $rk13 @ AES block 6 - round 13 5112 aese $ctr7b, $rk13 @ AES block 7 - round 13 5113 aese $ctr3b, $rk13 @ AES block 3 - round 13 5114 5115 add $end_input_ptr, $input_ptr, $bit_length, lsr #3 @ end_input_ptr 5116 cmp $input_ptr, $main_end_input_ptr @ check if we have <= 8 blocks 5117 b.ge .L256_enc_tail @ handle tail 5118 5119 ldp $ctr_t0q, $ctr_t1q, [$input_ptr], #32 @ AES block 0, 1 - load plaintext 5120 5121 ldp $ctr_t2q, $ctr_t3q, [$input_ptr], #32 @ AES block 2, 3 - load plaintext 5122 5123 eor3 $res0b, $ctr_t0b, $ctr0b, $rk14 @ AES block 0 - result 5124 rev32 $ctr0.16b, $rtmp_ctr.16b @ CTR block 8 5125 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8 5126 5127 eor3 $res1b, $ctr_t1b, $ctr1b, $rk14 @ AES block 1 - result 5128 eor3 $res3b, $ctr_t3b, $ctr3b, $rk14 @ AES block 3 - result 5129 5130 rev32 $ctr1.16b, $rtmp_ctr.16b @ CTR block 9 5131 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 9 5132 ldp $ctr_t4q, $ctr_t5q, [$input_ptr], #32 @ AES block 4, 5 - load plaintext 5133 5134 ldp $ctr_t6q, $ctr_t7q, [$input_ptr], #32 @ AES block 6, 7 - load plaintext 5135 eor3 $res2b, $ctr_t2b, $ctr2b, $rk14 @ AES block 2 - result 5136 cmp $input_ptr, $main_end_input_ptr @ check if we have <= 8 blocks 5137 5138 rev32 $ctr2.16b, $rtmp_ctr.16b @ CTR block 10 5139 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 10 5140 stp $res0q, $res1q, [$output_ptr], #32 @ AES block 0, 1 - store result 5141 5142 stp $res2q, $res3q, [$output_ptr], #32 @ AES block 2, 3 - store result 5143 5144 rev32 $ctr3.16b, $rtmp_ctr.16b @ CTR block 11 5145 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 11 5146 5147 eor3 $res4b, $ctr_t4b, $ctr4b, $rk14 @ AES block 4 - result 5148 5149 eor3 $res7b, $ctr_t7b, $ctr7b, $rk14 @ AES block 7 - result 5150 eor3 $res6b, $ctr_t6b, $ctr6b, $rk14 @ AES block 6 - result 5151 eor3 $res5b, $ctr_t5b, $ctr5b, $rk14 @ AES block 5 - result 5152 5153 stp $res4q, $res5q, [$output_ptr], #32 @ AES block 4, 5 - store result 5154 rev32 $ctr4.16b, $rtmp_ctr.16b @ CTR block 12 5155 5156 stp $res6q, $res7q, [$output_ptr], #32 @ AES block 6, 7 - store result 5157 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 12 5158 b.ge .L256_enc_prepretail @ do prepretail 5159 5160.L256_enc_main_loop: @ main loop start 5161 ldp $rk0q, $rk1q, [$cc, #0] @ load rk0, rk1 5162 5163 rev32 $ctr5.16b, $rtmp_ctr.16b @ CTR block 8k+13 5164 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+13 5165 ldr $h56kq, [$current_tag, #144] @ load h6k | h5k 5166 ldr $h78kq, [$current_tag, #192] @ load h8k | h7k 5167 5168 rev64 $res3b, $res3b @ GHASH block 8k+3 5169 ldr $h5q, [$current_tag, #128] @ load h5l | h5h 5170 ext $h5.16b, $h5.16b, $h5.16b, #8 5171 ldr $h6q, [$current_tag, #160] @ load h6l | h6h 5172 ext $h6.16b, $h6.16b, $h6.16b, #8 5173 rev64 $res1b, $res1b @ GHASH block 8k+1 5174 5175 rev32 $ctr6.16b, $rtmp_ctr.16b @ CTR block 8k+14 5176 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+14 5177 rev64 $res0b, $res0b @ GHASH block 8k 5178 5179 rev64 $res4b, $res4b @ GHASH block 8k+4 5180 ext $acc_lb, $acc_lb, $acc_lb, #8 @ PRE 0 5181 ldr $h7q, [$current_tag, #176] @ load h7l | h7h 5182 ext $h7.16b, $h7.16b, $h7.16b, #8 5183 ldr $h8q, [$current_tag, #208] @ load h8l | h8h 5184 ext $h8.16b, $h8.16b, $h8.16b, #8 5185 5186 aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 0 5187 aese $ctr5b, $rk0 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 0 5188 rev32 $ctr7.16b, $rtmp_ctr.16b @ CTR block 8k+15 5189 5190 aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 0 5191 aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 0 5192 aese $ctr6b, $rk0 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 0 5193 5194 aese $ctr7b, $rk0 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 0 5195 aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 0 5196 aese $ctr4b, $rk0 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 0 5197 5198 ldp $rk2q, $rk3q, [$cc, #32] @ load rk2, rk3 5199 eor $res0b, $res0b, $acc_lb @ PRE 1 5200 aese $ctr6b, $rk1 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 1 5201 5202 aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 1 5203 aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 1 5204 aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 1 5205 5206 aese $ctr4b, $rk1 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 1 5207 aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 1 5208 aese $ctr5b, $rk1 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 1 5209 5210 pmull2 $acc_h.1q, $res0.2d, $h8.2d @ GHASH block 8k - high 5211 pmull $acc_l.1q, $res0.1d, $h8.1d @ GHASH block 8k - low 5212 pmull2 $t0.1q, $res1.2d, $h7.2d @ GHASH block 8k+1 - high 5213 5214 trn1 $acc_m.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid 5215 trn2 $res0.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid 5216 aese $ctr7b, $rk1 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 1 5217 5218 aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 2 5219 aese $ctr5b, $rk2 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 2 5220 aese $ctr6b, $rk2 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 2 5221 5222 aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 2 5223 pmull $h7.1q, $res1.1d, $h7.1d @ GHASH block 8k+1 - low 5224 aese $ctr4b, $rk2 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 2 5225 5226 aese $ctr5b, $rk3 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 3 5227 aese $ctr6b, $rk3 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 3 5228 aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 2 5229 5230 aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 3 5231 aese $ctr7b, $rk2 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 2 5232 aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 2 5233 5234 aese $ctr4b, $rk3 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 3 5235 rev64 $res6b, $res6b @ GHASH block 8k+6 5236 pmull2 $t2.1q, $res3.2d, $h5.2d @ GHASH block 8k+3 - high 5237 5238 aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 3 5239 ldp $rk4q, $rk5q, [$cc, #64] @ load rk4, rk5 5240 rev64 $res2b, $res2b @ GHASH block 8k+2 5241 5242 aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 3 5243 aese $ctr7b, $rk3 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 3 5244 aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 3 5245 5246 eor $acc_hb, $acc_hb, $t0.16b @ GHASH block 8k+1 - high 5247 pmull2 $t1.1q, $res2.2d, $h6.2d @ GHASH block 8k+2 - high 5248 rev64 $res5b, $res5b @ GHASH block 8k+5 5249 5250 pmull $h5.1q, $res3.1d, $h5.1d @ GHASH block 8k+3 - low 5251 eor $acc_lb, $acc_lb, $h7.16b @ GHASH block 8k+1 - low 5252 ldr $h3q, [$current_tag, #80] @ load h3l | h3h 5253 ext $h3.16b, $h3.16b, $h3.16b, #8 5254 ldr $h4q, [$current_tag, #112] @ load h4l | h4h 5255 ext $h4.16b, $h4.16b, $h4.16b, #8 5256 5257 trn1 $t6.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid 5258 eor3 $acc_hb, $acc_hb, $t1.16b, $t2.16b @ GHASH block 8k+2, 8k+3 - high 5259 pmull $h6.1q, $res2.1d, $h6.1d @ GHASH block 8k+2 - low 5260 5261 aese $ctr4b, $rk4 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 4 5262 aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 4 5263 aese $ctr5b, $rk4 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 4 5264 5265 aese $ctr7b, $rk4 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 4 5266 aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 4 5267 aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 4 5268 5269 trn1 $t3.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid 5270 aese $ctr6b, $rk4 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 4 5271 aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 4 5272 5273 trn2 $res2.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid 5274 eor $res0.16b, $res0.16b, $acc_m.16b @ GHASH block 8k, 8k+1 - mid 5275 ldp $rk6q, $rk7q, [$cc, #96] @ load rk6, rk7 5276 5277 aese $ctr5b, $rk5 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 5 5278 aese $ctr7b, $rk5 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 5 5279 aese $ctr4b, $rk5 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 5 5280 5281 eor $res2.16b, $res2.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid 5282 aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 5 5283 rev64 $res7b, $res7b @ GHASH block 8k+7 5284 5285 aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 5 5286 aese $ctr6b, $rk5 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 5 5287 aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 5 5288 5289 pmull2 $t3.1q, $res2.2d, $h56k.2d @ GHASH block 8k+2 - mid 5290 pmull2 $acc_m.1q, $res0.2d, $h78k.2d @ GHASH block 8k - mid 5291 aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 5 5292 5293 pmull $h78k.1q, $res0.1d, $h78k.1d @ GHASH block 8k+1 - mid 5294 aese $ctr4b, $rk6 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 6 5295 aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 6 5296 5297 aese $ctr6b, $rk6 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 6 5298 aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 6 5299 aese $ctr7b, $rk6 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 6 5300 5301 eor $acc_mb, $acc_mb, $h78k.16b @ GHASH block 8k+1 - mid 5302 pmull $h56k.1q, $res2.1d, $h56k.1d @ GHASH block 8k+3 - mid 5303 aese $ctr5b, $rk6 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 6 5304 5305 eor3 $acc_lb, $acc_lb, $h6.16b, $h5.16b @ GHASH block 8k+2, 8k+3 - low 5306 aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 6 5307 aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 6 5308 5309 ldp $rk8q, $rk9q, [$cc, #128] @ load rk8, rk9 5310 pmull2 $t4.1q, $res4.2d, $h4.2d @ GHASH block 8k+4 - high 5311 aese $ctr5b, $rk7 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 7 5312 5313 ldr $h1q, [$current_tag, #32] @ load h1l | h1h 5314 ext $h1.16b, $h1.16b, $h1.16b, #8 5315 ldr $h2q, [$current_tag, #64] @ load h2l | h2h 5316 ext $h2.16b, $h2.16b, $h2.16b, #8 5317 aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 7 5318 eor3 $acc_mb, $acc_mb, $h56k.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid 5319 5320 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k 5321 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k 5322 aese $ctr6b, $rk7 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 7 5323 aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 7 5324 5325 aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 7 5326 aese $ctr7b, $rk7 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 7 5327 pmull $h4.1q, $res4.1d, $h4.1d @ GHASH block 8k+4 - low 5328 5329 trn2 $res4.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid 5330 aese $ctr4b, $rk7 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 7 5331 aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 7 5332 5333 pmull2 $t5.1q, $res5.2d, $h3.2d @ GHASH block 8k+5 - high 5334 aese $ctr7b, $rk8 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 8 5335 aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 8 5336 5337 pmull $h3.1q, $res5.1d, $h3.1d @ GHASH block 8k+5 - low 5338 trn1 $t9.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid 5339 eor $res4.16b, $res4.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid 5340 5341 aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 8 5342 aese $ctr0b, $rk9 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 9 5343 aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 8 5344 5345 pmull2 $t6.1q, $res4.2d, $h34k.2d @ GHASH block 8k+4 - mid 5346 pmull $h34k.1q, $res4.1d, $h34k.1d @ GHASH block 8k+5 - mid 5347 aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 8 5348 5349 aese $ctr5b, $rk8 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 8 5350 pmull2 $t7.1q, $res6.2d, $h2.2d @ GHASH block 8k+6 - high 5351 pmull $h2.1q, $res6.1d, $h2.1d @ GHASH block 8k+6 - low 5352 5353 aese $ctr6b, $rk8 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 8 5354 trn2 $res6.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid 5355 aese $ctr4b, $rk8 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 8 5356 5357 eor3 $acc_mb, $acc_mb, $h34k.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid 5358 aese $ctr7b, $rk9 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 9 5359 aese $ctr5b, $rk9 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 9 5360 5361 eor $res6.16b, $res6.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid 5362 aese $ctr6b, $rk9 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 9 5363 aese $ctr4b, $rk9 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 9 5364 5365 ldp $rk10q, $rk11q, [$cc, #160] @ load rk10, rk11 5366 aese $ctr2b, $rk9 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 9 5367 aese $ctr3b, $rk9 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 9 5368 5369 pmull2 $t8.1q, $res7.2d, $h1.2d @ GHASH block 8k+7 - high 5370 eor3 $acc_lb, $acc_lb, $h4.16b, $h3.16b @ GHASH block 8k+4, 8k+5 - low 5371 pmull $h1.1q, $res7.1d, $h1.1d @ GHASH block 8k+7 - low 5372 5373 ldr $mod_constantd, [$modulo_constant] @ MODULO - load modulo constant 5374 pmull2 $t9.1q, $res6.2d, $h12k.2d @ GHASH block 8k+6 - mid 5375 pmull $h12k.1q, $res6.1d, $h12k.1d @ GHASH block 8k+7 - mid 5376 5377 aese $ctr1b, $rk9 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 9 5378 5379 eor3 $acc_mb, $acc_mb, $h12k.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid 5380 eor3 $acc_lb, $acc_lb, $h2.16b, $h1.16b @ GHASH block 8k+6, 8k+7 - low 5381 eor3 $acc_hb, $acc_hb, $t4.16b, $t5.16b @ GHASH block 8k+4, 8k+5 - high 5382 5383 aese $ctr4b, $rk10 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 10 5384 aese $ctr3b, $rk10 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 10 5385 aese $ctr5b, $rk10 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 10 5386 5387 aese $ctr0b, $rk10 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 10 5388 aese $ctr2b, $rk10 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 10 5389 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+15 5390 5391 aese $ctr1b, $rk10 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 10 5392 aese $ctr7b, $rk10 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 10 5393 aese $ctr6b, $rk10 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 10 5394 5395 eor3 $acc_hb, $acc_hb, $t7.16b, $t8.16b @ GHASH block 8k+6, 8k+7 - high 5396 5397 ldp $rk12q, $rk13q, [$cc, #192] @ load rk12, rk13 5398 rev32 $h1.16b, $rtmp_ctr.16b @ CTR block 8k+16 5399 5400 ext $t11.16b, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment 5401 ldp $ctr_t0q, $ctr_t1q, [$input_ptr], #32 @ AES block 8k+8, 8k+9 - load plaintext 5402 aese $ctr2b, $rk11 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 11 5403 5404 aese $ctr6b, $rk11 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 11 5405 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+16 5406 aese $ctr3b, $rk11 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 11 5407 5408 aese $ctr0b, $rk11 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 11 5409 aese $ctr7b, $rk11 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 11 5410 5411 pmull $t12.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid 5412 aese $ctr1b, $rk11 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 11 5413 5414 aese $ctr7b, $rk12 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 12 5415 aese $ctr5b, $rk11 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 11 5416 5417 aese $ctr3b, $rk12 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 12 5418 aese $ctr6b, $rk12 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 12 5419 rev32 $h2.16b, $rtmp_ctr.16b @ CTR block 8k+17 5420 5421 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+17 5422 aese $ctr4b, $rk11 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 11 5423 eor3 $acc_mb, $acc_mb, $acc_hb, $acc_lb @ MODULO - karatsuba tidy up 5424 5425 aese $ctr5b, $rk12 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 12 5426 ldr $rk14q, [$cc, #224] @ load rk14 5427 aese $ctr7b, $rk13 @ AES block 8k+15 - round 13 5428 5429 ldp $ctr_t2q, $ctr_t3q, [$input_ptr], #32 @ AES block 8k+10, 8k+11 - load plaintext 5430 aese $ctr2b, $rk12 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 12 5431 aese $ctr4b, $rk12 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 12 5432 5433 eor3 $acc_mb, $acc_mb, $t12.16b, $t11.16b @ MODULO - fold into mid 5434 aese $ctr1b, $rk12 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 12 5435 ldp $ctr_t4q, $ctr_t5q, [$input_ptr], #32 @ AES block 4, 5 - load plaintext 5436 5437 ldp $ctr_t6q, $ctr_t7q, [$input_ptr], #32 @ AES block 6, 7 - load plaintext 5438 aese $ctr2b, $rk13 @ AES block 8k+10 - round 13 5439 aese $ctr4b, $rk13 @ AES block 8k+12 - round 13 5440 5441 rev32 $h3.16b, $rtmp_ctr.16b @ CTR block 8k+18 5442 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+18 5443 aese $ctr5b, $rk13 @ AES block 8k+13 - round 13 5444 5445 aese $ctr0b, $rk12 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 12 5446 aese $ctr3b, $rk13 @ AES block 8k+11 - round 13 5447 cmp $input_ptr, $main_end_input_ptr @ LOOP CONTROL 5448 5449 eor3 $res2b, $ctr_t2b, $ctr2b, $rk14 @ AES block 8k+10 - result 5450 rev32 $h4.16b, $rtmp_ctr.16b @ CTR block 8k+19 5451 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+19 5452 5453 aese $ctr0b, $rk13 @ AES block 8k+8 - round 13 5454 aese $ctr6b, $rk13 @ AES block 8k+14 - round 13 5455 eor3 $res5b, $ctr_t5b, $ctr5b, $rk14 @ AES block 5 - result 5456 5457 ext $t11.16b, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment 5458 pmull $acc_h.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low 5459 aese $ctr1b, $rk13 @ AES block 8k+9 - round 13 5460 5461 eor3 $res4b, $ctr_t4b, $ctr4b, $rk14 @ AES block 4 - result 5462 rev32 $ctr4.16b, $rtmp_ctr.16b @ CTR block 8k+20 5463 eor3 $res3b, $ctr_t3b, $ctr3b, $rk14 @ AES block 8k+11 - result 5464 5465 mov $ctr3.16b, $h4.16b @ CTR block 8k+19 5466 eor3 $res1b, $ctr_t1b, $ctr1b, $rk14 @ AES block 8k+9 - result 5467 eor3 $res0b, $ctr_t0b, $ctr0b, $rk14 @ AES block 8k+8 - result 5468 5469 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+20 5470 stp $res0q, $res1q, [$output_ptr], #32 @ AES block 8k+8, 8k+9 - store result 5471 mov $ctr2.16b, $h3.16b @ CTR block 8k+18 5472 5473 eor3 $res7b, $ctr_t7b, $ctr7b, $rk14 @ AES block 7 - result 5474 eor3 $acc_lb, $acc_lb, $t11.16b, $acc_hb @ MODULO - fold into low 5475 stp $res2q, $res3q, [$output_ptr], #32 @ AES block 8k+10, 8k+11 - store result 5476 5477 eor3 $res6b, $ctr_t6b, $ctr6b, $rk14 @ AES block 6 - result 5478 mov $ctr1.16b, $h2.16b @ CTR block 8k+17 5479 stp $res4q, $res5q, [$output_ptr], #32 @ AES block 4, 5 - store result 5480 5481 stp $res6q, $res7q, [$output_ptr], #32 @ AES block 6, 7 - store result 5482 mov $ctr0.16b, $h1.16b @ CTR block 8k+16 5483 b.lt .L256_enc_main_loop 5484 5485.L256_enc_prepretail: @ PREPRETAIL 5486 rev32 $ctr5.16b, $rtmp_ctr.16b @ CTR block 8k+13 5487 ldp $rk0q, $rk1q, [$cc, #0] @ load rk0, rk1 5488 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+13 5489 5490 rev64 $res2b, $res2b @ GHASH block 8k+2 5491 5492 rev32 $ctr6.16b, $rtmp_ctr.16b @ CTR block 8k+14 5493 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+14 5494 5495 rev64 $res5b, $res5b @ GHASH block 8k+5 5496 ldr $h56kq, [$current_tag, #144] @ load h6k | h5k 5497 ldr $h78kq, [$current_tag, #192] @ load h8k | h7k 5498 5499 rev32 $ctr7.16b, $rtmp_ctr.16b @ CTR block 8k+15 5500 5501 aese $ctr6b, $rk0 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 0 5502 aese $ctr4b, $rk0 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 0 5503 aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 0 5504 5505 aese $ctr5b, $rk0 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 0 5506 aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 0 5507 5508 aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 0 5509 aese $ctr7b, $rk0 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 0 5510 aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 0 5511 5512 ext $acc_lb, $acc_lb, $acc_lb, #8 @ PRE 0 5513 rev64 $res0b, $res0b @ GHASH block 8k 5514 aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 1 5515 5516 rev64 $res1b, $res1b @ GHASH block 8k+1 5517 ldp $rk2q, $rk3q, [$cc, #32] @ load rk2, rk3 5518 aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 1 5519 5520 ldr $h7q, [$current_tag, #176] @ load h7l | h7h 5521 ext $h7.16b, $h7.16b, $h7.16b, #8 5522 ldr $h8q, [$current_tag, #208] @ load h8l | h8h 5523 ext $h8.16b, $h8.16b, $h8.16b, #8 5524 aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 1 5525 5526 ldr $h5q, [$current_tag, #128] @ load h5l | h5h 5527 ext $h5.16b, $h5.16b, $h5.16b, #8 5528 ldr $h6q, [$current_tag, #160] @ load h6l | h6h 5529 ext $h6.16b, $h6.16b, $h6.16b, #8 5530 aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 1 5531 aese $ctr5b, $rk1 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 1 5532 5533 aese $ctr4b, $rk1 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 1 5534 eor $res0b, $res0b, $acc_lb @ PRE 1 5535 5536 rev64 $res3b, $res3b @ GHASH block 8k+3 5537 aese $ctr6b, $rk1 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 1 5538 5539 aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 2 5540 aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 2 5541 aese $ctr7b, $rk1 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 1 5542 5543 aese $ctr4b, $rk2 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 2 5544 aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 2 5545 aese $ctr6b, $rk2 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 2 5546 5547 aese $ctr5b, $rk2 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 2 5548 aese $ctr7b, $rk2 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 2 5549 aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 2 5550 5551 ldp $rk4q, $rk5q, [$cc, #64] @ load rk4, rk5 5552 trn1 $acc_m.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid 5553 pmull2 $acc_h.1q, $res0.2d, $h8.2d @ GHASH block 8k - high 5554 5555 rev64 $res6b, $res6b @ GHASH block 8k+6 5556 aese $ctr4b, $rk3 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 3 5557 pmull2 $t0.1q, $res1.2d, $h7.2d @ GHASH block 8k+1 - high 5558 5559 aese $ctr7b, $rk3 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 3 5560 pmull $acc_l.1q, $res0.1d, $h8.1d @ GHASH block 8k - low 5561 trn2 $res0.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid 5562 5563 pmull2 $t1.1q, $res2.2d, $h6.2d @ GHASH block 8k+2 - high 5564 aese $ctr6b, $rk3 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 3 5565 5566 aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 3 5567 aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 3 5568 eor $acc_hb, $acc_hb, $t0.16b @ GHASH block 8k+1 - high 5569 5570 pmull $h7.1q, $res1.1d, $h7.1d @ GHASH block 8k+1 - low 5571 pmull2 $t2.1q, $res3.2d, $h5.2d @ GHASH block 8k+3 - high 5572 aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 3 5573 5574 aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 3 5575 eor $res0.16b, $res0.16b, $acc_m.16b @ GHASH block 8k, 8k+1 - mid 5576 aese $ctr5b, $rk3 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 3 5577 5578 pmull $h6.1q, $res2.1d, $h6.1d @ GHASH block 8k+2 - low 5579 aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 4 5580 aese $ctr6b, $rk4 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 4 5581 5582 aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 4 5583 aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 4 5584 aese $ctr4b, $rk4 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 4 5585 5586 aese $ctr6b, $rk5 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 5 5587 pmull2 $acc_m.1q, $res0.2d, $h78k.2d @ GHASH block 8k - mid 5588 eor3 $acc_hb, $acc_hb, $t1.16b, $t2.16b @ GHASH block 8k+2, 8k+3 - high 5589 5590 aese $ctr7b, $rk4 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 4 5591 trn1 $t3.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid 5592 trn2 $res2.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid 5593 5594 aese $ctr5b, $rk4 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 4 5595 eor $acc_lb, $acc_lb, $h7.16b @ GHASH block 8k+1 - low 5596 aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 4 5597 5598 pmull $h5.1q, $res3.1d, $h5.1d @ GHASH block 8k+3 - low 5599 pmull $h78k.1q, $res0.1d, $h78k.1d @ GHASH block 8k+1 - mid 5600 eor $res2.16b, $res2.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid 5601 5602 rev64 $res4b, $res4b @ GHASH block 8k+4 5603 aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 5 5604 aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 5 5605 5606 aese $ctr7b, $rk5 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 5 5607 aese $ctr4b, $rk5 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 5 5608 ldp $rk6q, $rk7q, [$cc, #96] @ load rk6, rk7 5609 5610 ldr $h3q, [$current_tag, #80] @ load h3l | h3h 5611 ext $h3.16b, $h3.16b, $h3.16b, #8 5612 ldr $h4q, [$current_tag, #112] @ load h4l | h4h 5613 ext $h4.16b, $h4.16b, $h4.16b, #8 5614 pmull2 $t3.1q, $res2.2d, $h56k.2d @ GHASH block 8k+2 - mid 5615 pmull $h56k.1q, $res2.1d, $h56k.1d @ GHASH block 8k+3 - mid 5616 5617 eor3 $acc_lb, $acc_lb, $h6.16b, $h5.16b @ GHASH block 8k+2, 8k+3 - low 5618 eor $acc_mb, $acc_mb, $h78k.16b @ GHASH block 8k+1 - mid 5619 5620 aese $ctr5b, $rk5 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 5 5621 rev64 $res7b, $res7b @ GHASH block 8k+7 5622 trn1 $t6.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid 5623 5624 aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 5 5625 aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 5 5626 eor3 $acc_mb, $acc_mb, $h56k.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid 5627 5628 aese $ctr7b, $rk6 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 6 5629 aese $ctr4b, $rk6 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 6 5630 aese $ctr6b, $rk6 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 6 5631 5632 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k 5633 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k 5634 aese $ctr5b, $rk6 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 6 5635 aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 6 5636 5637 aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 6 5638 aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 6 5639 aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 6 5640 5641 pmull2 $t4.1q, $res4.2d, $h4.2d @ GHASH block 8k+4 - high 5642 pmull $h4.1q, $res4.1d, $h4.1d @ GHASH block 8k+4 - low 5643 ldr $h1q, [$current_tag, #32] @ load h1l | h1h 5644 ext $h1.16b, $h1.16b, $h1.16b, #8 5645 ldr $h2q, [$current_tag, #64] @ load h2l | h2h 5646 ext $h2.16b, $h2.16b, $h2.16b, #8 5647 5648 ldp $rk8q, $rk9q, [$cc, #128] @ load rk8, rk9 5649 aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 7 5650 aese $ctr4b, $rk7 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 7 5651 5652 pmull2 $t5.1q, $res5.2d, $h3.2d @ GHASH block 8k+5 - high 5653 trn2 $res4.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid 5654 5655 aese $ctr5b, $rk7 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 7 5656 aese $ctr6b, $rk7 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 7 5657 pmull $h3.1q, $res5.1d, $h3.1d @ GHASH block 8k+5 - low 5658 5659 aese $ctr7b, $rk7 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 7 5660 aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 7 5661 eor $res4.16b, $res4.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid 5662 5663 pmull2 $t7.1q, $res6.2d, $h2.2d @ GHASH block 8k+6 - high 5664 pmull $h2.1q, $res6.1d, $h2.1d @ GHASH block 8k+6 - low 5665 aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 7 5666 5667 trn1 $t9.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid 5668 trn2 $res6.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid 5669 aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 7 5670 5671 aese $ctr7b, $rk8 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 8 5672 eor3 $acc_lb, $acc_lb, $h4.16b, $h3.16b @ GHASH block 8k+4, 8k+5 - low 5673 aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 8 5674 5675 aese $ctr6b, $rk8 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 8 5676 aese $ctr4b, $rk8 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 8 5677 aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 8 5678 5679 aese $ctr5b, $rk8 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 8 5680 eor $res6.16b, $res6.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid 5681 aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 8 5682 5683 pmull2 $t6.1q, $res4.2d, $h34k.2d @ GHASH block 8k+4 - mid 5684 pmull $h34k.1q, $res4.1d, $h34k.1d @ GHASH block 8k+5 - mid 5685 aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 8 5686 5687 pmull2 $t8.1q, $res7.2d, $h1.2d @ GHASH block 8k+7 - high 5688 pmull2 $t9.1q, $res6.2d, $h12k.2d @ GHASH block 8k+6 - mid 5689 pmull $h12k.1q, $res6.1d, $h12k.1d @ GHASH block 8k+7 - mid 5690 5691 pmull $h1.1q, $res7.1d, $h1.1d @ GHASH block 8k+7 - low 5692 eor3 $acc_mb, $acc_mb, $h34k.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid 5693 eor3 $acc_hb, $acc_hb, $t4.16b, $t5.16b @ GHASH block 8k+4, 8k+5 - high 5694 5695 ldp $rk10q, $rk11q, [$cc, #160] @ load rk10, rk11 5696 aese $ctr1b, $rk9 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 9 5697 aese $ctr0b, $rk9 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 9 5698 5699 eor3 $acc_hb, $acc_hb, $t7.16b, $t8.16b @ GHASH block 8k+6, 8k+7 - high 5700 eor3 $acc_mb, $acc_mb, $h12k.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid 5701 ldr $mod_constantd, [$modulo_constant] @ MODULO - load modulo constant 5702 5703 eor3 $acc_lb, $acc_lb, $h2.16b, $h1.16b @ GHASH block 8k+6, 8k+7 - low 5704 5705 aese $ctr3b, $rk9 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 9 5706 aese $ctr7b, $rk9 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 9 5707 aese $ctr5b, $rk9 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 9 5708 5709 aese $ctr2b, $rk9 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 9 5710 aese $ctr6b, $rk9 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 9 5711 5712 aese $ctr5b, $rk10 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 10 5713 aese $ctr1b, $rk10 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 10 5714 aese $ctr4b, $rk9 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 9 5715 5716 aese $ctr7b, $rk10 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 10 5717 aese $ctr6b, $rk10 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 10 5718 aese $ctr3b, $rk10 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 10 5719 5720 aese $ctr4b, $rk10 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 10 5721 aese $ctr0b, $rk10 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 10 5722 aese $ctr2b, $rk10 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 10 5723 5724 pmull $t12.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid 5725 eor3 $acc_mb, $acc_mb, $acc_hb, $acc_lb @ MODULO - karatsuba tidy up 5726 aese $ctr7b, $rk11 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 11 5727 5728 ldp $rk12q, $rk13q, [$cc, #192] @ load rk12, rk13 5729 ext $t11.16b, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment 5730 aese $ctr2b, $rk11 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 11 5731 5732 eor3 $acc_mb, $acc_mb, $t12.16b, $t11.16b @ MODULO - fold into mid 5733 aese $ctr1b, $rk11 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 11 5734 aese $ctr6b, $rk11 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 11 5735 5736 aese $ctr0b, $rk11 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 11 5737 aese $ctr4b, $rk11 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 11 5738 aese $ctr5b, $rk11 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 11 5739 5740 pmull $acc_h.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low 5741 aese $ctr3b, $rk11 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 11 5742 ldr $rk14q, [$cc, #224] @ load rk14 5743 5744 aese $ctr1b, $rk12 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 12 5745 aese $ctr2b, $rk12 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 12 5746 aese $ctr0b, $rk12 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 12 5747 5748 aese $ctr6b, $rk12 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 12 5749 aese $ctr5b, $rk12 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 12 5750 ext $t11.16b, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment 5751 5752 aese $ctr4b, $rk12 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 12 5753 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+15 5754 5755 aese $ctr3b, $rk12 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 12 5756 aese $ctr7b, $rk12 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 12 5757 aese $ctr0b, $rk13 @ AES block 8k+8 - round 13 5758 5759 eor3 $acc_lb, $acc_lb, $t11.16b, $acc_hb @ MODULO - fold into low 5760 aese $ctr5b, $rk13 @ AES block 8k+13 - round 13 5761 aese $ctr1b, $rk13 @ AES block 8k+9 - round 13 5762 5763 aese $ctr3b, $rk13 @ AES block 8k+11 - round 13 5764 aese $ctr4b, $rk13 @ AES block 8k+12 - round 13 5765 aese $ctr7b, $rk13 @ AES block 8k+15 - round 13 5766 5767 aese $ctr2b, $rk13 @ AES block 8k+10 - round 13 5768 aese $ctr6b, $rk13 @ AES block 8k+14 - round 13 5769.L256_enc_tail: @ TAIL 5770 5771 ldp $h78kq, $h8q, [$current_tag, #192] @ load h8l | h8h 5772 ext $h8.16b, $h8.16b, $h8.16b, #8 5773 sub $main_end_input_ptr, $end_input_ptr, $input_ptr @ main_end_input_ptr is number of bytes left to process 5774 5775 ldr $ctr_t0q, [$input_ptr], #16 @ AES block 8k+8 - load plaintext 5776 5777 ldp $h5q, $h56kq, [$current_tag, #128] @ load h5l | h5h 5778 ext $h5.16b, $h5.16b, $h5.16b, #8 5779 5780 ext $t0.16b, $acc_lb, $acc_lb, #8 @ prepare final partial tag 5781 ldp $h6q, $h7q, [$current_tag, #160] @ load h6l | h6h 5782 ext $h6.16b, $h6.16b, $h6.16b, #8 5783 ext $h7.16b, $h7.16b, $h7.16b, #8 5784 mov $t1.16b, $rk14 5785 5786 cmp $main_end_input_ptr, #112 5787 eor3 $res1b, $ctr_t0b, $ctr0b, $t1.16b @ AES block 8k+8 - result 5788 b.gt .L256_enc_blocks_more_than_7 5789 5790 movi $acc_l.8b, #0 5791 mov $ctr7b, $ctr6b 5792 movi $acc_h.8b, #0 5793 5794 mov $ctr6b, $ctr5b 5795 mov $ctr5b, $ctr4b 5796 mov $ctr4b, $ctr3b 5797 5798 mov $ctr3b, $ctr2b 5799 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 5800 mov $ctr2b, $ctr1b 5801 5802 movi $acc_m.8b, #0 5803 cmp $main_end_input_ptr, #96 5804 b.gt .L256_enc_blocks_more_than_6 5805 5806 mov $ctr7b, $ctr6b 5807 mov $ctr6b, $ctr5b 5808 cmp $main_end_input_ptr, #80 5809 5810 mov $ctr5b, $ctr4b 5811 mov $ctr4b, $ctr3b 5812 mov $ctr3b, $ctr1b 5813 5814 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 5815 b.gt .L256_enc_blocks_more_than_5 5816 5817 mov $ctr7b, $ctr6b 5818 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 5819 5820 mov $ctr6b, $ctr5b 5821 mov $ctr5b, $ctr4b 5822 5823 cmp $main_end_input_ptr, #64 5824 mov $ctr4b, $ctr1b 5825 b.gt .L256_enc_blocks_more_than_4 5826 5827 cmp $main_end_input_ptr, #48 5828 mov $ctr7b, $ctr6b 5829 mov $ctr6b, $ctr5b 5830 5831 mov $ctr5b, $ctr1b 5832 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 5833 b.gt .L256_enc_blocks_more_than_3 5834 5835 cmp $main_end_input_ptr, #32 5836 mov $ctr7b, $ctr6b 5837 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k 5838 5839 mov $ctr6b, $ctr1b 5840 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 5841 b.gt .L256_enc_blocks_more_than_2 5842 5843 mov $ctr7b, $ctr1b 5844 5845 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 5846 cmp $main_end_input_ptr, #16 5847 b.gt .L256_enc_blocks_more_than_1 5848 5849 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 5850 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k 5851 b .L256_enc_blocks_less_than_1 5852.L256_enc_blocks_more_than_7: @ blocks left > 7 5853 st1 { $res1b}, [$output_ptr], #16 @ AES final-7 block - store result 5854 5855 rev64 $res0b, $res1b @ GHASH final-7 block 5856 5857 eor $res0b, $res0b, $t0.16b @ feed in partial tag 5858 5859 ldr $ctr_t1q, [$input_ptr], #16 @ AES final-6 block - load plaintext 5860 5861 pmull2 $acc_h.1q, $res0.2d, $h8.2d @ GHASH final-7 block - high 5862 ins $rk4v.d[0], $res0.d[1] @ GHASH final-7 block - mid 5863 ins $acc_m.d[0], $h78k.d[1] @ GHASH final-7 block - mid 5864 5865 movi $t0.8b, #0 @ suppress further partial tag feed in 5866 5867 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-7 block - mid 5868 eor3 $res1b, $ctr_t1b, $ctr1b, $t1.16b @ AES final-6 block - result 5869 5870 pmull $acc_m.1q, $rk4v.1d, $acc_m.1d @ GHASH final-7 block - mid 5871 pmull $acc_l.1q, $res0.1d, $h8.1d @ GHASH final-7 block - low 5872.L256_enc_blocks_more_than_6: @ blocks left > 6 5873 5874 st1 { $res1b}, [$output_ptr], #16 @ AES final-6 block - store result 5875 5876 rev64 $res0b, $res1b @ GHASH final-6 block 5877 5878 eor $res0b, $res0b, $t0.16b @ feed in partial tag 5879 5880 pmull $rk3q1, $res0.1d, $h7.1d @ GHASH final-6 block - low 5881 ins $rk4v.d[0], $res0.d[1] @ GHASH final-6 block - mid 5882 pmull2 $rk2q1, $res0.2d, $h7.2d @ GHASH final-6 block - high 5883 5884 ldr $ctr_t1q, [$input_ptr], #16 @ AES final-5 block - load plaintext 5885 5886 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-6 block - low 5887 5888 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-6 block - mid 5889 5890 pmull $rk4v.1q, $rk4v.1d, $h78k.1d @ GHASH final-6 block - mid 5891 eor3 $res1b, $ctr_t1b, $ctr2b, $t1.16b @ AES final-5 block - result 5892 5893 movi $t0.8b, #0 @ suppress further partial tag feed in 5894 5895 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-6 block - mid 5896 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-6 block - high 5897.L256_enc_blocks_more_than_5: @ blocks left > 5 5898 5899 st1 { $res1b}, [$output_ptr], #16 @ AES final-5 block - store result 5900 5901 rev64 $res0b, $res1b @ GHASH final-5 block 5902 5903 eor $res0b, $res0b, $t0.16b @ feed in partial tag 5904 5905 ins $rk4v.d[0], $res0.d[1] @ GHASH final-5 block - mid 5906 5907 pmull2 $rk2q1, $res0.2d, $h6.2d @ GHASH final-5 block - high 5908 5909 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-5 block - high 5910 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-5 block - mid 5911 5912 ins $rk4v.d[1], $rk4v.d[0] @ GHASH final-5 block - mid 5913 5914 ldr $ctr_t1q, [$input_ptr], #16 @ AES final-4 block - load plaintext 5915 pmull $rk3q1, $res0.1d, $h6.1d @ GHASH final-5 block - low 5916 5917 pmull2 $rk4v.1q, $rk4v.2d, $h56k.2d @ GHASH final-5 block - mid 5918 movi $t0.8b, #0 @ suppress further partial tag feed in 5919 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-5 block - low 5920 5921 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-5 block - mid 5922 eor3 $res1b, $ctr_t1b, $ctr3b, $t1.16b @ AES final-4 block - result 5923.L256_enc_blocks_more_than_4: @ blocks left > 4 5924 5925 st1 { $res1b}, [$output_ptr], #16 @ AES final-4 block - store result 5926 5927 rev64 $res0b, $res1b @ GHASH final-4 block 5928 5929 ldr $ctr_t1q, [$input_ptr], #16 @ AES final-3 block - load plaintext 5930 5931 eor $res0b, $res0b, $t0.16b @ feed in partial tag 5932 5933 ins $rk4v.d[0], $res0.d[1] @ GHASH final-4 block - mid 5934 pmull2 $rk2q1, $res0.2d, $h5.2d @ GHASH final-4 block - high 5935 5936 eor3 $res1b, $ctr_t1b, $ctr4b, $t1.16b @ AES final-3 block - result 5937 pmull $rk3q1, $res0.1d, $h5.1d @ GHASH final-4 block - low 5938 5939 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-4 block - mid 5940 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-4 block - low 5941 5942 pmull $rk4v.1q, $rk4v.1d, $h56k.1d @ GHASH final-4 block - mid 5943 5944 movi $t0.8b, #0 @ suppress further partial tag feed in 5945 5946 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-4 block - mid 5947 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-4 block - high 5948.L256_enc_blocks_more_than_3: @ blocks left > 3 5949 5950 st1 { $res1b}, [$output_ptr], #16 @ AES final-3 block - store result 5951 5952 ldr $h4q, [$current_tag, #112] @ load h4l | h4h 5953 ext $h4.16b, $h4.16b, $h4.16b, #8 5954 rev64 $res0b, $res1b @ GHASH final-3 block 5955 5956 eor $res0b, $res0b, $t0.16b @ feed in partial tag 5957 5958 ins $rk4v.d[0], $res0.d[1] @ GHASH final-3 block - mid 5959 pmull2 $rk2q1, $res0.2d, $h4.2d @ GHASH final-3 block - high 5960 5961 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-3 block - high 5962 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-3 block - mid 5963 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k 5964 5965 ins $rk4v.d[1], $rk4v.d[0] @ GHASH final-3 block - mid 5966 ldr $ctr_t1q, [$input_ptr], #16 @ AES final-2 block - load plaintext 5967 5968 pmull2 $rk4v.1q, $rk4v.2d, $h34k.2d @ GHASH final-3 block - mid 5969 pmull $rk3q1, $res0.1d, $h4.1d @ GHASH final-3 block - low 5970 5971 eor3 $res1b, $ctr_t1b, $ctr5b, $t1.16b @ AES final-2 block - result 5972 movi $t0.8b, #0 @ suppress further partial tag feed in 5973 5974 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-3 block - mid 5975 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-3 block - low 5976.L256_enc_blocks_more_than_2: @ blocks left > 2 5977 5978 ldr $h3q, [$current_tag, #80] @ load h3l | h3h 5979 ext $h3.16b, $h3.16b, $h3.16b, #8 5980 5981 st1 { $res1b}, [$output_ptr], #16 @ AES final-2 block - store result 5982 5983 rev64 $res0b, $res1b @ GHASH final-2 block 5984 ldr $ctr_t1q, [$input_ptr], #16 @ AES final-1 block - load plaintext 5985 5986 eor $res0b, $res0b, $t0.16b @ feed in partial tag 5987 5988 ins $rk4v.d[0], $res0.d[1] @ GHASH final-2 block - mid 5989 5990 movi $t0.8b, #0 @ suppress further partial tag feed in 5991 5992 pmull2 $rk2q1, $res0.2d, $h3.2d @ GHASH final-2 block - high 5993 eor3 $res1b, $ctr_t1b, $ctr6b, $t1.16b @ AES final-1 block - result 5994 5995 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-2 block - mid 5996 5997 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-2 block - high 5998 5999 pmull $rk4v.1q, $rk4v.1d, $h34k.1d @ GHASH final-2 block - mid 6000 pmull $rk3q1, $res0.1d, $h3.1d @ GHASH final-2 block - low 6001 6002 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-2 block - mid 6003 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-2 block - low 6004.L256_enc_blocks_more_than_1: @ blocks left > 1 6005 6006 st1 { $res1b}, [$output_ptr], #16 @ AES final-1 block - store result 6007 6008 ldr $h2q, [$current_tag, #64] @ load h2l | h2h 6009 ext $h2.16b, $h2.16b, $h2.16b, #8 6010 rev64 $res0b, $res1b @ GHASH final-1 block 6011 ldr $ctr_t1q, [$input_ptr], #16 @ AES final block - load plaintext 6012 6013 eor $res0b, $res0b, $t0.16b @ feed in partial tag 6014 movi $t0.8b, #0 @ suppress further partial tag feed in 6015 6016 ins $rk4v.d[0], $res0.d[1] @ GHASH final-1 block - mid 6017 pmull2 $rk2q1, $res0.2d, $h2.2d @ GHASH final-1 block - high 6018 6019 eor3 $res1b, $ctr_t1b, $ctr7b, $t1.16b @ AES final block - result 6020 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-1 block - high 6021 6022 pmull $rk3q1, $res0.1d, $h2.1d @ GHASH final-1 block - low 6023 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-1 block - mid 6024 6025 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k 6026 6027 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-1 block - low 6028 ins $rk4v.d[1], $rk4v.d[0] @ GHASH final-1 block - mid 6029 6030 pmull2 $rk4v.1q, $rk4v.2d, $h12k.2d @ GHASH final-1 block - mid 6031 6032 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-1 block - mid 6033.L256_enc_blocks_less_than_1: @ blocks left <= 1 6034 6035 and $bit_length, $bit_length, #127 @ bit_length %= 128 6036 6037 sub $bit_length, $bit_length, #128 @ bit_length -= 128 6038 6039 neg $bit_length, $bit_length @ bit_length = 128 - #bits in input (in range [1,128]) 6040 6041 mvn $temp0_x, xzr @ temp0_x = 0xffffffffffffffff 6042 and $bit_length, $bit_length, #127 @ bit_length %= 128 6043 6044 lsr $temp0_x, $temp0_x, $bit_length @ temp0_x is mask for top 64b of last block 6045 cmp $bit_length, #64 6046 mvn $temp1_x, xzr @ temp1_x = 0xffffffffffffffff 6047 6048 csel $temp3_x, $temp0_x, xzr, lt 6049 csel $temp2_x, $temp1_x, $temp0_x, lt 6050 6051 mov $ctr0.d[0], $temp2_x @ ctr0b is mask for last block 6052 ldr $h1q, [$current_tag, #32] @ load h1l | h1h 6053 ext $h1.16b, $h1.16b, $h1.16b, #8 6054 6055 ld1 { $rk0}, [$output_ptr] @ load existing bytes where the possibly partial last block is to be stored 6056 mov $ctr0.d[1], $temp3_x 6057 6058 and $res1b, $res1b, $ctr0b @ possibly partial last block has zeroes in highest bits 6059 6060 rev64 $res0b, $res1b @ GHASH final block 6061 6062 rev32 $rtmp_ctr.16b, $rtmp_ctr.16b 6063 bif $res1b, $rk0, $ctr0b @ insert existing bytes in top end of result before storing 6064 str $rtmp_ctrq, [$counter] @ store the updated counter 6065 6066 eor $res0b, $res0b, $t0.16b @ feed in partial tag 6067 st1 { $res1b}, [$output_ptr] @ store all 16B 6068 6069 ins $t0.d[0], $res0.d[1] @ GHASH final block - mid 6070 pmull2 $rk2q1, $res0.2d, $h1.2d @ GHASH final block - high 6071 pmull $rk3q1, $res0.1d, $h1.1d @ GHASH final block - low 6072 6073 eor $acc_hb, $acc_hb, $rk2 @ GHASH final block - high 6074 eor $acc_lb, $acc_lb, $rk3 @ GHASH final block - low 6075 6076 eor $t0.8b, $t0.8b, $res0.8b @ GHASH final block - mid 6077 6078 pmull $t0.1q, $t0.1d, $h12k.1d @ GHASH final block - mid 6079 6080 eor $acc_mb, $acc_mb, $t0.16b @ GHASH final block - mid 6081 ldr $mod_constantd, [$modulo_constant] @ MODULO - load modulo constant 6082 6083 ext $t11.16b, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment 6084 6085 eor3 $acc_mb, $acc_mb, $acc_hb, $acc_lb @ MODULO - karatsuba tidy up 6086 pmull $t12.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid 6087 6088 eor3 $acc_mb, $acc_mb, $t12.16b, $t11.16b @ MODULO - fold into mid 6089 6090 pmull $acc_h.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low 6091 ext $t11.16b, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment 6092 6093 eor3 $acc_lb, $acc_lb, $acc_hb, $t11.16b @ MODULO - fold into low 6094 ext $acc_lb, $acc_lb, $acc_lb, #8 6095 rev64 $acc_lb, $acc_lb 6096 st1 { $acc_l.16b }, [$current_tag] 6097 mov x0, $byte_length @ return sizes 6098 6099 ldp d10, d11, [sp, #16] 6100 ldp d12, d13, [sp, #32] 6101 ldp d14, d15, [sp, #48] 6102 ldp d8, d9, [sp], #80 6103 ret 6104 6105.L256_enc_ret: 6106 mov w0, #0x0 6107 ret 6108.size unroll8_eor3_aes_gcm_enc_256_kernel,.-unroll8_eor3_aes_gcm_enc_256_kernel 6109___ 6110 6111{ 6112######################################################################################### 6113# size_t unroll8_eor3_aes_gcm_dec_256_kernel(const uint8_t * ciphertext, 6114# uint64_t plaintext_length, 6115# uint8_t * plaintext, 6116# uint64_t *Xi, 6117# unsigned char ivec[16], 6118# const void *key); 6119# 6120$code.=<<___; 6121.global unroll8_eor3_aes_gcm_dec_256_kernel 6122.type unroll8_eor3_aes_gcm_dec_256_kernel,%function 6123.align 4 6124unroll8_eor3_aes_gcm_dec_256_kernel: 6125 AARCH64_VALID_CALL_TARGET 6126 cbz x1, .L256_dec_ret 6127 stp d8, d9, [sp, #-80]! 6128 lsr $byte_length, $bit_length, #3 6129 mov $counter, x4 6130 mov $cc, x5 6131 stp d10, d11, [sp, #16] 6132 stp d12, d13, [sp, #32] 6133 stp d14, d15, [sp, #48] 6134 mov x5, #0xc200000000000000 6135 stp x5, xzr, [sp, #64] 6136 add $modulo_constant, sp, #64 6137 6138 ld1 { $ctr0b}, [$counter] @ CTR block 0 6139 6140 mov $constant_temp, #0x100000000 @ set up counter increment 6141 movi $rctr_inc.16b, #0x0 6142 mov $rctr_inc.d[1], $constant_temp 6143 mov $main_end_input_ptr, $byte_length 6144 6145 sub $main_end_input_ptr, $main_end_input_ptr, #1 @ byte_len - 1 6146 6147 rev32 $rtmp_ctr.16b, $ctr0.16b @ set up reversed counter 6148 6149 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 0 6150 6151 rev32 $ctr1.16b, $rtmp_ctr.16b @ CTR block 1 6152 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 1 6153 6154 rev32 $ctr2.16b, $rtmp_ctr.16b @ CTR block 2 6155 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 2 6156 ldp $rk0q, $rk1q, [$cc, #0] @ load rk0, rk1 6157 6158 rev32 $ctr3.16b, $rtmp_ctr.16b @ CTR block 3 6159 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 3 6160 6161 rev32 $ctr4.16b, $rtmp_ctr.16b @ CTR block 4 6162 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 4 6163 6164 aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 0 6165 6166 rev32 $ctr5.16b, $rtmp_ctr.16b @ CTR block 5 6167 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 5 6168 6169 aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 0 6170 aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 0 6171 6172 rev32 $ctr6.16b, $rtmp_ctr.16b @ CTR block 6 6173 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 6 6174 6175 rev32 $ctr7.16b, $rtmp_ctr.16b @ CTR block 7 6176 aese $ctr4b, $rk0 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 0 6177 6178 aese $ctr6b, $rk0 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 0 6179 aese $ctr5b, $rk0 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 0 6180 6181 aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 0 6182 aese $ctr7b, $rk0 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 0 6183 ldp $rk2q, $rk3q, [$cc, #32] @ load rk2, rk3 6184 6185 aese $ctr6b, $rk1 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 1 6186 aese $ctr4b, $rk1 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 1 6187 aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 1 6188 6189 aese $ctr5b, $rk1 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 1 6190 aese $ctr7b, $rk1 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 1 6191 aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 1 6192 6193 aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 1 6194 aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 1 6195 6196 aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 2 6197 aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 2 6198 aese $ctr6b, $rk2 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 2 6199 6200 aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 2 6201 aese $ctr7b, $rk2 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 2 6202 aese $ctr5b, $rk2 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 2 6203 6204 aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 2 6205 aese $ctr4b, $rk2 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 2 6206 ldp $rk4q, $rk5q, [$cc, #64] @ load rk4, rk5 6207 6208 aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 3 6209 aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 3 6210 6211 aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 3 6212 aese $ctr4b, $rk3 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 3 6213 6214 aese $ctr5b, $rk3 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 3 6215 aese $ctr7b, $rk3 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 3 6216 aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 3 6217 6218 aese $ctr6b, $rk3 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 3 6219 6220 aese $ctr7b, $rk4 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 4 6221 aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 4 6222 6223 aese $ctr6b, $rk4 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 4 6224 aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 4 6225 aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 4 6226 6227 aese $ctr4b, $rk4 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 4 6228 aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 4 6229 aese $ctr5b, $rk4 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 4 6230 6231 aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 5 6232 aese $ctr6b, $rk5 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 5 6233 6234 ldp $rk6q, $rk7q, [$cc, #96] @ load rk6, rk7 6235 aese $ctr4b, $rk5 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 5 6236 aese $ctr7b, $rk5 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 5 6237 6238 aese $ctr5b, $rk5 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 5 6239 6240 aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 5 6241 aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 5 6242 6243 aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 5 6244 6245 aese $ctr4b, $rk6 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 6 6246 aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 6 6247 aese $ctr7b, $rk6 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 6 6248 6249 aese $ctr6b, $rk6 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 6 6250 aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 6 6251 aese $ctr5b, $rk6 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 6 6252 6253 aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 6 6254 aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 6 6255 ldp $rk8q, $rk9q, [$cc, #128] @ load rk8, rk9 6256 6257 aese $ctr5b, $rk7 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 7 6258 aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 7 6259 6260 aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 7 6261 aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 7 6262 aese $ctr7b, $rk7 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 7 6263 6264 aese $ctr4b, $rk7 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 7 6265 aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 7 6266 aese $ctr6b, $rk7 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 7 6267 6268 and $main_end_input_ptr, $main_end_input_ptr, #0xffffffffffffff80 @ number of bytes to be processed in main loop (at least 1 byte must be handled by tail) 6269 aese $ctr7b, $rk8 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 8 6270 aese $ctr5b, $rk8 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 8 6271 6272 aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 8 6273 aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 8 6274 aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 8 6275 6276 aese $ctr4b, $rk8 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 8 6277 aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 8 6278 aese $ctr6b, $rk8 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 8 6279 6280 aese $ctr2b, $rk9 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 9 6281 6282 ld1 { $acc_lb}, [$current_tag] 6283 ext $acc_lb, $acc_lb, $acc_lb, #8 6284 rev64 $acc_lb, $acc_lb 6285 ldp $rk10q, $rk11q, [$cc, #160] @ load rk10, rk11 6286 add $end_input_ptr, $input_ptr, $bit_length, lsr #3 @ end_input_ptr 6287 add $main_end_input_ptr, $main_end_input_ptr, $input_ptr 6288 6289 aese $ctr3b, $rk9 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 9 6290 aese $ctr6b, $rk9 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 9 6291 6292 aese $ctr4b, $rk9 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 9 6293 aese $ctr5b, $rk9 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 9 6294 6295 aese $ctr7b, $rk9 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 9 6296 6297 aese $ctr0b, $rk9 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 9 6298 aese $ctr1b, $rk9 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 9 6299 6300 aese $ctr4b, $rk10 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 10 6301 aese $ctr7b, $rk10 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 10 6302 aese $ctr5b, $rk10 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 10 6303 6304 aese $ctr1b, $rk10 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 10 6305 aese $ctr2b, $rk10 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 10 6306 aese $ctr0b, $rk10 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 10 6307 6308 aese $ctr6b, $rk10 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 10 6309 aese $ctr3b, $rk10 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 10 6310 ldp $rk12q, $rk13q, [$cc, #192] @ load rk12, rk13 6311 6312 aese $ctr0b, $rk11 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 11 6313 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 7 6314 6315 aese $ctr7b, $rk11 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 11 6316 aese $ctr3b, $rk11 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 11 6317 aese $ctr1b, $rk11 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 11 6318 6319 aese $ctr5b, $rk11 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 11 6320 aese $ctr4b, $rk11 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 11 6321 aese $ctr2b, $rk11 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 11 6322 6323 aese $ctr6b, $rk11 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 11 6324 ldr $rk14q, [$cc, #224] @ load rk14 6325 6326 aese $ctr1b, $rk12 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 12 6327 aese $ctr4b, $rk12 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 12 6328 aese $ctr5b, $rk12 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 12 6329 6330 cmp $input_ptr, $main_end_input_ptr @ check if we have <= 8 blocks 6331 aese $ctr3b, $rk12 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 12 6332 aese $ctr2b, $rk12 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 12 6333 6334 aese $ctr6b, $rk12 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 12 6335 aese $ctr0b, $rk12 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 12 6336 aese $ctr7b, $rk12 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 12 6337 6338 aese $ctr5b, $rk13 @ AES block 5 - round 13 6339 aese $ctr1b, $rk13 @ AES block 1 - round 13 6340 aese $ctr2b, $rk13 @ AES block 2 - round 13 6341 6342 aese $ctr0b, $rk13 @ AES block 0 - round 13 6343 aese $ctr4b, $rk13 @ AES block 4 - round 13 6344 aese $ctr6b, $rk13 @ AES block 6 - round 13 6345 6346 aese $ctr3b, $rk13 @ AES block 3 - round 13 6347 aese $ctr7b, $rk13 @ AES block 7 - round 13 6348 b.ge .L256_dec_tail @ handle tail 6349 6350 ldp $res0q, $res1q, [$input_ptr], #32 @ AES block 0, 1 - load ciphertext 6351 6352 ldp $res2q, $res3q, [$input_ptr], #32 @ AES block 2, 3 - load ciphertext 6353 6354 ldp $res4q, $res5q, [$input_ptr], #32 @ AES block 4, 5 - load ciphertext 6355 6356 ldp $res6q, $res7q, [$input_ptr], #32 @ AES block 6, 7 - load ciphertext 6357 cmp $input_ptr, $main_end_input_ptr @ check if we have <= 8 blocks 6358 6359 eor3 $ctr1b, $res1b, $ctr1b, $rk14 @ AES block 1 - result 6360 eor3 $ctr0b, $res0b, $ctr0b, $rk14 @ AES block 0 - result 6361 stp $ctr0q, $ctr1q, [$output_ptr], #32 @ AES block 0, 1 - store result 6362 6363 rev32 $ctr0.16b, $rtmp_ctr.16b @ CTR block 8 6364 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8 6365 eor3 $ctr3b, $res3b, $ctr3b, $rk14 @ AES block 3 - result 6366 6367 eor3 $ctr5b, $res5b, $ctr5b, $rk14 @ AES block 5 - result 6368 6369 eor3 $ctr4b, $res4b, $ctr4b, $rk14 @ AES block 4 - result 6370 rev32 $ctr1.16b, $rtmp_ctr.16b @ CTR block 9 6371 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 9 6372 6373 eor3 $ctr2b, $res2b, $ctr2b, $rk14 @ AES block 2 - result 6374 stp $ctr2q, $ctr3q, [$output_ptr], #32 @ AES block 2, 3 - store result 6375 6376 rev32 $ctr2.16b, $rtmp_ctr.16b @ CTR block 10 6377 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 10 6378 6379 eor3 $ctr6b, $res6b, $ctr6b, $rk14 @ AES block 6 - result 6380 6381 rev32 $ctr3.16b, $rtmp_ctr.16b @ CTR block 11 6382 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 11 6383 stp $ctr4q, $ctr5q, [$output_ptr], #32 @ AES block 4, 5 - store result 6384 6385 eor3 $ctr7b, $res7b, $ctr7b, $rk14 @ AES block 7 - result 6386 stp $ctr6q, $ctr7q, [$output_ptr], #32 @ AES block 6, 7 - store result 6387 6388 rev32 $ctr4.16b, $rtmp_ctr.16b @ CTR block 12 6389 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 12 6390 b.ge .L256_dec_prepretail @ do prepretail 6391 6392.L256_dec_main_loop: @ main loop start 6393 rev32 $ctr5.16b, $rtmp_ctr.16b @ CTR block 8k+13 6394 ldp $rk0q, $rk1q, [$cc, #0] @ load rk0, rk1 6395 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+13 6396 6397 rev64 $res1b, $res1b @ GHASH block 8k+1 6398 ldr $h7q, [$current_tag, #176] @ load h7l | h7h 6399 ext $h7.16b, $h7.16b, $h7.16b, #8 6400 ldr $h8q, [$current_tag, #208] @ load h8l | h8h 6401 ext $h8.16b, $h8.16b, $h8.16b, #8 6402 6403 rev32 $ctr6.16b, $rtmp_ctr.16b @ CTR block 8k+14 6404 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+14 6405 rev64 $res0b, $res0b @ GHASH block 8k 6406 6407 ext $acc_lb, $acc_lb, $acc_lb, #8 @ PRE 0 6408 rev64 $res4b, $res4b @ GHASH block 8k+4 6409 rev64 $res3b, $res3b @ GHASH block 8k+3 6410 6411 rev32 $ctr7.16b, $rtmp_ctr.16b @ CTR block 8k+15 6412 rev64 $res7b, $res7b @ GHASH block 8k+7 6413 6414 aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 0 6415 aese $ctr6b, $rk0 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 0 6416 aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 0 6417 6418 aese $ctr7b, $rk0 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 0 6419 aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 0 6420 aese $ctr5b, $rk0 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 0 6421 6422 aese $ctr4b, $rk0 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 0 6423 aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 0 6424 ldp $rk2q, $rk3q, [$cc, #32] @ load rk2, rk3 6425 6426 eor $res0b, $res0b, $acc_lb @ PRE 1 6427 ldr $h5q, [$current_tag, #128] @ load h5l | h5h 6428 ext $h5.16b, $h5.16b, $h5.16b, #8 6429 ldr $h6q, [$current_tag, #160] @ load h6l | h6h 6430 ext $h6.16b, $h6.16b, $h6.16b, #8 6431 aese $ctr6b, $rk1 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 1 6432 6433 aese $ctr4b, $rk1 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 1 6434 rev64 $res2b, $res2b @ GHASH block 8k+2 6435 aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 1 6436 6437 aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 1 6438 aese $ctr5b, $rk1 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 1 6439 aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 1 6440 6441 trn1 $acc_m.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid 6442 aese $ctr7b, $rk1 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 1 6443 aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 1 6444 6445 aese $ctr4b, $rk2 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 2 6446 aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 2 6447 aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 2 6448 6449 aese $ctr6b, $rk2 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 2 6450 aese $ctr7b, $rk2 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 2 6451 pmull $acc_l.1q, $res0.1d, $h8.1d @ GHASH block 8k - low 6452 6453 aese $ctr5b, $rk2 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 2 6454 aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 2 6455 aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 2 6456 6457 ldp $rk4q, $rk5q, [$cc, #64] @ load rk4, rk5 6458 pmull2 $t1.1q, $res2.2d, $h6.2d @ GHASH block 8k+2 - high 6459 aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 3 6460 6461 aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 3 6462 pmull2 $t0.1q, $res1.2d, $h7.2d @ GHASH block 8k+1 - high 6463 pmull $h7.1q, $res1.1d, $h7.1d @ GHASH block 8k+1 - low 6464 6465 aese $ctr5b, $rk3 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 3 6466 aese $ctr6b, $rk3 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 3 6467 pmull2 $acc_h.1q, $res0.2d, $h8.2d @ GHASH block 8k - high 6468 6469 aese $ctr4b, $rk3 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 3 6470 aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 3 6471 trn2 $res0.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid 6472 6473 pmull2 $t2.1q, $res3.2d, $h5.2d @ GHASH block 8k+3 - high 6474 aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 3 6475 eor $acc_hb, $acc_hb, $t0.16b @ GHASH block 8k+1 - high 6476 6477 aese $ctr5b, $rk4 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 4 6478 aese $ctr7b, $rk3 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 3 6479 aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 4 6480 6481 aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 4 6482 aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 4 6483 aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 4 6484 6485 aese $ctr6b, $rk4 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 4 6486 aese $ctr7b, $rk4 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 4 6487 aese $ctr4b, $rk4 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 4 6488 6489 ldr $h56kq, [$current_tag, #144] @ load h6k | h5k 6490 ldr $h78kq, [$current_tag, #192] @ load h8k | h7k 6491 eor $res0.16b, $res0.16b, $acc_m.16b @ GHASH block 8k, 8k+1 - mid 6492 pmull $h6.1q, $res2.1d, $h6.1d @ GHASH block 8k+2 - low 6493 6494 ldp $rk6q, $rk7q, [$cc, #96] @ load rk6, rk7 6495 aese $ctr5b, $rk5 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 5 6496 eor $acc_lb, $acc_lb, $h7.16b @ GHASH block 8k+1 - low 6497 6498 aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 5 6499 aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 5 6500 aese $ctr7b, $rk5 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 5 6501 6502 aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 5 6503 aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 5 6504 aese $ctr6b, $rk5 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 5 6505 6506 eor3 $acc_hb, $acc_hb, $t1.16b, $t2.16b @ GHASH block 8k+2, 8k+3 - high 6507 trn1 $t3.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid 6508 rev64 $res5b, $res5b @ GHASH block 8k+5 6509 6510 pmull2 $acc_m.1q, $res0.2d, $h78k.2d @ GHASH block 8k - mid 6511 pmull $h78k.1q, $res0.1d, $h78k.1d @ GHASH block 8k+1 - mid 6512 trn2 $res2.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid 6513 6514 aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 6 6515 aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 6 6516 aese $ctr4b, $rk5 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 5 6517 6518 trn1 $t6.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid 6519 aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 6 6520 aese $ctr6b, $rk6 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 6 6521 6522 eor $res2.16b, $res2.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid 6523 pmull $h5.1q, $res3.1d, $h5.1d @ GHASH block 8k+3 - low 6524 aese $ctr4b, $rk6 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 6 6525 6526 aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 6 6527 aese $ctr5b, $rk6 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 6 6528 aese $ctr7b, $rk6 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 6 6529 6530 pmull2 $t3.1q, $res2.2d, $h56k.2d @ GHASH block 8k+2 - mid 6531 pmull $h56k.1q, $res2.1d, $h56k.1d @ GHASH block 8k+3 - mid 6532 eor3 $acc_lb, $acc_lb, $h6.16b, $h5.16b @ GHASH block 8k+2, 8k+3 - low 6533 6534 ldr $h3q, [$current_tag, #80] @ load h3l | h3h 6535 ext $h3.16b, $h3.16b, $h3.16b, #8 6536 ldr $h4q, [$current_tag, #112] @ load h4l | h4h 6537 ext $h4.16b, $h4.16b, $h4.16b, #8 6538 rev64 $res6b, $res6b @ GHASH block 8k+6 6539 eor $acc_mb, $acc_mb, $h78k.16b @ GHASH block 8k+1 - mid 6540 6541 aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 7 6542 aese $ctr5b, $rk7 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 7 6543 ldp $rk8q, $rk9q, [$cc, #128] @ load rk8, rk9 6544 6545 ldr $h1q, [$current_tag, #32] @ load h1l | h1h 6546 ext $h1.16b, $h1.16b, $h1.16b, #8 6547 ldr $h2q, [$current_tag, #64] @ load h2l | h2h 6548 ext $h2.16b, $h2.16b, $h2.16b, #8 6549 eor3 $acc_mb, $acc_mb, $h56k.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid 6550 aese $ctr7b, $rk7 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 7 6551 6552 aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 7 6553 aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 7 6554 aese $ctr6b, $rk7 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 7 6555 6556 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k 6557 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k 6558 aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 7 6559 aese $ctr4b, $rk7 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 7 6560 6561 pmull2 $t4.1q, $res4.2d, $h4.2d @ GHASH block 8k+4 - high 6562 pmull $h4.1q, $res4.1d, $h4.1d @ GHASH block 8k+4 - low 6563 trn2 $res4.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid 6564 6565 aese $ctr5b, $rk8 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 8 6566 pmull2 $t5.1q, $res5.2d, $h3.2d @ GHASH block 8k+5 - high 6567 aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 8 6568 6569 aese $ctr6b, $rk8 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 8 6570 pmull $h3.1q, $res5.1d, $h3.1d @ GHASH block 8k+5 - low 6571 aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 8 6572 6573 aese $ctr4b, $rk8 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 8 6574 aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 8 6575 pmull2 $t7.1q, $res6.2d, $h2.2d @ GHASH block 8k+6 - high 6576 6577 trn1 $t9.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid 6578 aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 8 6579 aese $ctr7b, $rk8 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 8 6580 6581 ldp $rk10q, $rk11q, [$cc, #160] @ load rk10, rk11 6582 pmull $h2.1q, $res6.1d, $h2.1d @ GHASH block 8k+6 - low 6583 trn2 $res6.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid 6584 6585 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+15 6586 eor3 $acc_hb, $acc_hb, $t4.16b, $t5.16b @ GHASH block 8k+4, 8k+5 - high 6587 aese $ctr3b, $rk9 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 9 6588 6589 aese $ctr6b, $rk9 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 9 6590 eor $res6.16b, $res6.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid 6591 aese $ctr5b, $rk9 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 9 6592 6593 ldp $res0q, $res1q, [$input_ptr], #32 @ AES block 8k+8, 8k+9 - load ciphertext 6594 eor $res4.16b, $res4.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid 6595 aese $ctr7b, $rk9 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 9 6596 6597 pmull2 $t9.1q, $res6.2d, $h12k.2d @ GHASH block 8k+6 - mid 6598 aese $ctr2b, $rk9 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 9 6599 aese $ctr1b, $rk9 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 9 6600 6601 pmull2 $t6.1q, $res4.2d, $h34k.2d @ GHASH block 8k+4 - mid 6602 pmull $h34k.1q, $res4.1d, $h34k.1d @ GHASH block 8k+5 - mid 6603 pmull2 $t8.1q, $res7.2d, $h1.2d @ GHASH block 8k+7 - high 6604 6605 pmull $h1.1q, $res7.1d, $h1.1d @ GHASH block 8k+7 - low 6606 aese $ctr3b, $rk10 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 10 6607 aese $ctr6b, $rk10 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 10 6608 6609 pmull $h12k.1q, $res6.1d, $h12k.1d @ GHASH block 8k+7 - mid 6610 aese $ctr0b, $rk9 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 9 6611 eor3 $acc_lb, $acc_lb, $h4.16b, $h3.16b @ GHASH block 8k+4, 8k+5 - low 6612 6613 aese $ctr4b, $rk9 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 9 6614 eor3 $acc_mb, $acc_mb, $h34k.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid 6615 eor3 $acc_hb, $acc_hb, $t7.16b, $t8.16b @ GHASH block 8k+6, 8k+7 - high 6616 6617 aese $ctr2b, $rk10 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 10 6618 aese $ctr5b, $rk10 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 10 6619 aese $ctr7b, $rk10 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 10 6620 6621 aese $ctr1b, $rk10 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 10 6622 aese $ctr0b, $rk10 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 10 6623 aese $ctr4b, $rk10 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 10 6624 6625 eor3 $acc_lb, $acc_lb, $h2.16b, $h1.16b @ GHASH block 8k+6, 8k+7 - low 6626 rev32 $h1.16b, $rtmp_ctr.16b @ CTR block 8k+16 6627 ldr $mod_constantd, [$modulo_constant] @ MODULO - load modulo constant 6628 6629 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+16 6630 aese $ctr1b, $rk11 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 11 6631 ldp $rk12q, $rk13q, [$cc, #192] @ load rk12, rk13 6632 6633 aese $ctr0b, $rk11 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 11 6634 aese $ctr6b, $rk11 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 11 6635 6636 eor3 $acc_mb, $acc_mb, $h12k.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid 6637 rev32 $h2.16b, $rtmp_ctr.16b @ CTR block 8k+17 6638 aese $ctr2b, $rk11 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 11 6639 6640 ldp $res2q, $res3q, [$input_ptr], #32 @ AES block 8k+10, 8k+11 - load ciphertext 6641 aese $ctr7b, $rk11 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 11 6642 ext $t11.16b, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment 6643 6644 aese $ctr5b, $rk11 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 11 6645 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+17 6646 aese $ctr3b, $rk11 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 11 6647 6648 aese $ctr2b, $rk12 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 12 6649 aese $ctr7b, $rk12 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 12 6650 aese $ctr6b, $rk12 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 12 6651 6652 rev32 $h3.16b, $rtmp_ctr.16b @ CTR block 8k+18 6653 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+18 6654 pmull $t12.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid 6655 6656 eor3 $acc_mb, $acc_mb, $acc_hb, $acc_lb @ MODULO - karatsuba tidy up 6657 aese $ctr1b, $rk12 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 12 6658 aese $ctr4b, $rk11 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 11 6659 6660 ldr $rk14q, [$cc, #224] @ load rk14 6661 aese $ctr5b, $rk12 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 12 6662 aese $ctr3b, $rk12 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 12 6663 6664 eor3 $acc_mb, $acc_mb, $t12.16b, $t11.16b @ MODULO - fold into mid 6665 aese $ctr0b, $rk12 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 12 6666 aese $ctr4b, $rk12 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 12 6667 6668 ldp $res4q, $res5q, [$input_ptr], #32 @ AES block 8k+12, 8k+13 - load ciphertext 6669 aese $ctr1b, $rk13 @ AES block 8k+9 - round 13 6670 aese $ctr2b, $rk13 @ AES block 8k+10 - round 13 6671 6672 ldp $res6q, $res7q, [$input_ptr], #32 @ AES block 8k+14, 8k+15 - load ciphertext 6673 aese $ctr0b, $rk13 @ AES block 8k+8 - round 13 6674 aese $ctr5b, $rk13 @ AES block 8k+13 - round 13 6675 6676 rev32 $h4.16b, $rtmp_ctr.16b @ CTR block 8k+19 6677 eor3 $ctr2b, $res2b, $ctr2b, $rk14 @ AES block 8k+10 - result 6678 eor3 $ctr1b, $res1b, $ctr1b, $rk14 @ AES block 8k+9 - result 6679 6680 ext $t11.16b, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment 6681 aese $ctr7b, $rk13 @ AES block 8k+15 - round 13 6682 6683 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+19 6684 pmull $acc_h.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low 6685 aese $ctr4b, $rk13 @ AES block 8k+12 - round 13 6686 6687 eor3 $ctr5b, $res5b, $ctr5b, $rk14 @ AES block 8k+13 - result 6688 eor3 $ctr0b, $res0b, $ctr0b, $rk14 @ AES block 8k+8 - result 6689 aese $ctr3b, $rk13 @ AES block 8k+11 - round 13 6690 6691 stp $ctr0q, $ctr1q, [$output_ptr], #32 @ AES block 8k+8, 8k+9 - store result 6692 mov $ctr0.16b, $h1.16b @ CTR block 8k+16 6693 eor3 $ctr4b, $res4b, $ctr4b, $rk14 @ AES block 8k+12 - result 6694 6695 eor3 $acc_lb, $acc_lb, $t11.16b, $acc_hb @ MODULO - fold into low 6696 eor3 $ctr3b, $res3b, $ctr3b, $rk14 @ AES block 8k+11 - result 6697 stp $ctr2q, $ctr3q, [$output_ptr], #32 @ AES block 8k+10, 8k+11 - store result 6698 6699 mov $ctr3.16b, $h4.16b @ CTR block 8k+19 6700 mov $ctr2.16b, $h3.16b @ CTR block 8k+18 6701 aese $ctr6b, $rk13 @ AES block 8k+14 - round 13 6702 6703 mov $ctr1.16b, $h2.16b @ CTR block 8k+17 6704 stp $ctr4q, $ctr5q, [$output_ptr], #32 @ AES block 8k+12, 8k+13 - store result 6705 eor3 $ctr7b, $res7b, $ctr7b, $rk14 @ AES block 8k+15 - result 6706 6707 eor3 $ctr6b, $res6b, $ctr6b, $rk14 @ AES block 8k+14 - result 6708 rev32 $ctr4.16b, $rtmp_ctr.16b @ CTR block 8k+20 6709 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+20 6710 6711 cmp $input_ptr, $main_end_input_ptr @ LOOP CONTROL 6712 stp $ctr6q, $ctr7q, [$output_ptr], #32 @ AES block 8k+14, 8k+15 - store result 6713 b.lt .L256_dec_main_loop 6714 6715.L256_dec_prepretail: @ PREPRETAIL 6716 ldp $rk0q, $rk1q, [$cc, #0] @ load rk0, rk1 6717 rev32 $ctr5.16b, $rtmp_ctr.16b @ CTR block 8k+13 6718 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+13 6719 6720 rev64 $res4b, $res4b @ GHASH block 8k+4 6721 ldr $h56kq, [$current_tag, #144] @ load h6k | h5k 6722 ldr $h78kq, [$current_tag, #192] @ load h8k | h7k 6723 6724 rev32 $ctr6.16b, $rtmp_ctr.16b @ CTR block 8k+14 6725 rev64 $res0b, $res0b @ GHASH block 8k 6726 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+14 6727 6728 ext $acc_lb, $acc_lb, $acc_lb, #8 @ PRE 0 6729 ldr $h7q, [$current_tag, #176] @ load h7l | h7h 6730 ext $h7.16b, $h7.16b, $h7.16b, #8 6731 ldr $h8q, [$current_tag, #208] @ load h8l | h8h 6732 ext $h8.16b, $h8.16b, $h8.16b, #8 6733 rev64 $res1b, $res1b @ GHASH block 8k+1 6734 6735 rev32 $ctr7.16b, $rtmp_ctr.16b @ CTR block 8k+15 6736 rev64 $res2b, $res2b @ GHASH block 8k+2 6737 ldr $h5q, [$current_tag, #128] @ load h5l | h5h 6738 ext $h5.16b, $h5.16b, $h5.16b, #8 6739 ldr $h6q, [$current_tag, #160] @ load h6l | h6h 6740 ext $h6.16b, $h6.16b, $h6.16b, #8 6741 6742 aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 0 6743 aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 0 6744 aese $ctr4b, $rk0 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 0 6745 6746 aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 0 6747 aese $ctr5b, $rk0 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 0 6748 aese $ctr6b, $rk0 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 0 6749 6750 aese $ctr4b, $rk1 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 1 6751 aese $ctr7b, $rk0 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 0 6752 aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 0 6753 6754 ldp $rk2q, $rk3q, [$cc, #32] @ load rk2, rk3 6755 aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 1 6756 eor $res0b, $res0b, $acc_lb @ PRE 1 6757 6758 aese $ctr7b, $rk1 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 1 6759 aese $ctr6b, $rk1 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 1 6760 aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 1 6761 6762 aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 1 6763 aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 1 6764 aese $ctr5b, $rk1 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 1 6765 6766 pmull2 $t0.1q, $res1.2d, $h7.2d @ GHASH block 8k+1 - high 6767 trn1 $acc_m.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid 6768 pmull $acc_l.1q, $res0.1d, $h8.1d @ GHASH block 8k - low 6769 6770 rev64 $res3b, $res3b @ GHASH block 8k+3 6771 pmull $h7.1q, $res1.1d, $h7.1d @ GHASH block 8k+1 - low 6772 6773 aese $ctr5b, $rk2 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 2 6774 aese $ctr7b, $rk2 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 2 6775 aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 2 6776 6777 aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 2 6778 aese $ctr6b, $rk2 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 2 6779 pmull2 $acc_h.1q, $res0.2d, $h8.2d @ GHASH block 8k - high 6780 6781 aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 2 6782 aese $ctr7b, $rk3 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 3 6783 6784 aese $ctr5b, $rk3 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 3 6785 rev64 $res6b, $res6b @ GHASH block 8k+6 6786 6787 aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 3 6788 aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 2 6789 aese $ctr6b, $rk3 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 3 6790 6791 pmull2 $t1.1q, $res2.2d, $h6.2d @ GHASH block 8k+2 - high 6792 trn2 $res0.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid 6793 aese $ctr4b, $rk2 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 2 6794 6795 ldp $rk4q, $rk5q, [$cc, #64] @ load rk4, rk5 6796 aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 3 6797 pmull2 $t2.1q, $res3.2d, $h5.2d @ GHASH block 8k+3 - high 6798 6799 aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 3 6800 eor $acc_hb, $acc_hb, $t0.16b @ GHASH block 8k+1 - high 6801 eor $res0.16b, $res0.16b, $acc_m.16b @ GHASH block 8k, 8k+1 - mid 6802 6803 aese $ctr4b, $rk3 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 3 6804 pmull $h6.1q, $res2.1d, $h6.1d @ GHASH block 8k+2 - low 6805 aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 3 6806 6807 eor3 $acc_hb, $acc_hb, $t1.16b, $t2.16b @ GHASH block 8k+2, 8k+3 - high 6808 trn1 $t3.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid 6809 trn2 $res2.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid 6810 6811 pmull2 $acc_m.1q, $res0.2d, $h78k.2d @ GHASH block 8k - mid 6812 pmull $h5.1q, $res3.1d, $h5.1d @ GHASH block 8k+3 - low 6813 eor $acc_lb, $acc_lb, $h7.16b @ GHASH block 8k+1 - low 6814 6815 pmull $h78k.1q, $res0.1d, $h78k.1d @ GHASH block 8k+1 - mid 6816 aese $ctr5b, $rk4 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 4 6817 aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 4 6818 6819 eor3 $acc_lb, $acc_lb, $h6.16b, $h5.16b @ GHASH block 8k+2, 8k+3 - low 6820 ldr $h1q, [$current_tag, #32] @ load h1l | h1h 6821 ext $h1.16b, $h1.16b, $h1.16b, #8 6822 ldr $h2q, [$current_tag, #64] @ load h2l | h2h 6823 ext $h2.16b, $h2.16b, $h2.16b, #8 6824 aese $ctr7b, $rk4 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 4 6825 6826 aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 4 6827 aese $ctr6b, $rk4 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 4 6828 eor $acc_mb, $acc_mb, $h78k.16b @ GHASH block 8k+1 - mid 6829 6830 eor $res2.16b, $res2.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid 6831 aese $ctr7b, $rk5 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 5 6832 aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 4 6833 6834 aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 5 6835 aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 4 6836 aese $ctr4b, $rk4 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 4 6837 6838 aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 5 6839 pmull2 $t3.1q, $res2.2d, $h56k.2d @ GHASH block 8k+2 - mid 6840 aese $ctr6b, $rk5 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 5 6841 6842 aese $ctr4b, $rk5 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 5 6843 aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 5 6844 pmull $h56k.1q, $res2.1d, $h56k.1d @ GHASH block 8k+3 - mid 6845 6846 aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 5 6847 aese $ctr5b, $rk5 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 5 6848 ldp $rk6q, $rk7q, [$cc, #96] @ load rk6, rk7 6849 6850 ldr $h3q, [$current_tag, #80] @ load h3l | h3h 6851 ext $h3.16b, $h3.16b, $h3.16b, #8 6852 ldr $h4q, [$current_tag, #112] @ load h4l | h4h 6853 ext $h4.16b, $h4.16b, $h4.16b, #8 6854 rev64 $res7b, $res7b @ GHASH block 8k+7 6855 rev64 $res5b, $res5b @ GHASH block 8k+5 6856 6857 eor3 $acc_mb, $acc_mb, $h56k.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid 6858 6859 trn1 $t6.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid 6860 6861 aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 6 6862 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k 6863 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k 6864 aese $ctr6b, $rk6 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 6 6865 6866 aese $ctr5b, $rk6 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 6 6867 aese $ctr7b, $rk6 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 6 6868 6869 pmull2 $t4.1q, $res4.2d, $h4.2d @ GHASH block 8k+4 - high 6870 pmull2 $t5.1q, $res5.2d, $h3.2d @ GHASH block 8k+5 - high 6871 pmull $h4.1q, $res4.1d, $h4.1d @ GHASH block 8k+4 - low 6872 6873 trn2 $res4.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid 6874 pmull $h3.1q, $res5.1d, $h3.1d @ GHASH block 8k+5 - low 6875 trn1 $t9.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid 6876 6877 aese $ctr7b, $rk7 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 7 6878 pmull2 $t7.1q, $res6.2d, $h2.2d @ GHASH block 8k+6 - high 6879 aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 6 6880 6881 aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 6 6882 aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 6 6883 aese $ctr4b, $rk6 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 6 6884 6885 ldp $rk8q, $rk9q, [$cc, #128] @ load rk8, rk9 6886 pmull $h2.1q, $res6.1d, $h2.1d @ GHASH block 8k+6 - low 6887 aese $ctr5b, $rk7 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 7 6888 6889 aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 7 6890 aese $ctr4b, $rk7 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 7 6891 6892 aese $ctr6b, $rk7 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 7 6893 aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 7 6894 eor3 $acc_hb, $acc_hb, $t4.16b, $t5.16b @ GHASH block 8k+4, 8k+5 - high 6895 6896 aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 7 6897 trn2 $res6.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid 6898 aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 7 6899 6900 aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 8 6901 aese $ctr7b, $rk8 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 8 6902 aese $ctr4b, $rk8 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 8 6903 6904 aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 8 6905 aese $ctr5b, $rk8 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 8 6906 aese $ctr6b, $rk8 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 8 6907 6908 aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 8 6909 aese $ctr4b, $rk9 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 9 6910 eor $res4.16b, $res4.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid 6911 6912 aese $ctr0b, $rk9 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 9 6913 aese $ctr1b, $rk9 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 9 6914 eor $res6.16b, $res6.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid 6915 6916 aese $ctr6b, $rk9 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 9 6917 aese $ctr7b, $rk9 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 9 6918 pmull2 $t6.1q, $res4.2d, $h34k.2d @ GHASH block 8k+4 - mid 6919 6920 aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 8 6921 pmull $h34k.1q, $res4.1d, $h34k.1d @ GHASH block 8k+5 - mid 6922 pmull2 $t8.1q, $res7.2d, $h1.2d @ GHASH block 8k+7 - high 6923 6924 pmull2 $t9.1q, $res6.2d, $h12k.2d @ GHASH block 8k+6 - mid 6925 pmull $h12k.1q, $res6.1d, $h12k.1d @ GHASH block 8k+7 - mid 6926 pmull $h1.1q, $res7.1d, $h1.1d @ GHASH block 8k+7 - low 6927 6928 ldp $rk10q, $rk11q, [$cc, #160] @ load rk10, rk11 6929 eor3 $acc_lb, $acc_lb, $h4.16b, $h3.16b @ GHASH block 8k+4, 8k+5 - low 6930 eor3 $acc_mb, $acc_mb, $h34k.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid 6931 6932 aese $ctr2b, $rk9 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 9 6933 aese $ctr3b, $rk9 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 9 6934 aese $ctr5b, $rk9 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 9 6935 6936 eor3 $acc_hb, $acc_hb, $t7.16b, $t8.16b @ GHASH block 8k+6, 8k+7 - high 6937 eor3 $acc_lb, $acc_lb, $h2.16b, $h1.16b @ GHASH block 8k+6, 8k+7 - low 6938 ldr $mod_constantd, [$modulo_constant] @ MODULO - load modulo constant 6939 6940 eor3 $acc_mb, $acc_mb, $h12k.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid 6941 6942 aese $ctr4b, $rk10 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 10 6943 aese $ctr6b, $rk10 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 10 6944 aese $ctr5b, $rk10 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 10 6945 6946 aese $ctr0b, $rk10 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 10 6947 aese $ctr2b, $rk10 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 10 6948 aese $ctr3b, $rk10 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 10 6949 6950 eor3 $acc_mb, $acc_mb, $acc_hb, $acc_lb @ MODULO - karatsuba tidy up 6951 6952 aese $ctr7b, $rk10 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 10 6953 aese $ctr1b, $rk10 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 10 6954 ldp $rk12q, $rk13q, [$cc, #192] @ load rk12, rk13 6955 6956 ext $t11.16b, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment 6957 6958 aese $ctr2b, $rk11 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 11 6959 aese $ctr1b, $rk11 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 11 6960 aese $ctr0b, $rk11 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 11 6961 6962 pmull $t12.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid 6963 aese $ctr3b, $rk11 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 11 6964 6965 aese $ctr7b, $rk11 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 11 6966 aese $ctr6b, $rk11 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 11 6967 aese $ctr4b, $rk11 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 11 6968 6969 aese $ctr5b, $rk11 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 11 6970 aese $ctr3b, $rk12 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 12 6971 6972 eor3 $acc_mb, $acc_mb, $t12.16b, $t11.16b @ MODULO - fold into mid 6973 6974 aese $ctr3b, $rk13 @ AES block 8k+11 - round 13 6975 aese $ctr2b, $rk12 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 12 6976 aese $ctr6b, $rk12 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 12 6977 6978 pmull $acc_h.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low 6979 aese $ctr4b, $rk12 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 12 6980 aese $ctr7b, $rk12 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 12 6981 6982 aese $ctr0b, $rk12 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 12 6983 ldr $rk14q, [$cc, #224] @ load rk14 6984 aese $ctr1b, $rk12 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 12 6985 6986 aese $ctr4b, $rk13 @ AES block 8k+12 - round 13 6987 ext $t11.16b, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment 6988 aese $ctr5b, $rk12 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 12 6989 6990 aese $ctr6b, $rk13 @ AES block 8k+14 - round 13 6991 aese $ctr2b, $rk13 @ AES block 8k+10 - round 13 6992 aese $ctr1b, $rk13 @ AES block 8k+9 - round 13 6993 6994 aese $ctr5b, $rk13 @ AES block 8k+13 - round 13 6995 eor3 $acc_lb, $acc_lb, $t11.16b, $acc_hb @ MODULO - fold into low 6996 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+15 6997 6998 aese $ctr7b, $rk13 @ AES block 8k+15 - round 13 6999 aese $ctr0b, $rk13 @ AES block 8k+8 - round 13 7000.L256_dec_tail: @ TAIL 7001 7002 ext $t0.16b, $acc_lb, $acc_lb, #8 @ prepare final partial tag 7003 sub $main_end_input_ptr, $end_input_ptr, $input_ptr @ main_end_input_ptr is number of bytes left to process 7004 cmp $main_end_input_ptr, #112 7005 7006 ldr $res1q, [$input_ptr], #16 @ AES block 8k+8 - load ciphertext 7007 7008 ldp $h78kq, $h8q, [$current_tag, #192] @ load h8k | h7k 7009 ext $h8.16b, $h8.16b, $h8.16b, #8 7010 mov $t1.16b, $rk14 7011 7012 ldp $h5q, $h56kq, [$current_tag, #128] @ load h5l | h5h 7013 ext $h5.16b, $h5.16b, $h5.16b, #8 7014 7015 eor3 $res4b, $res1b, $ctr0b, $t1.16b @ AES block 8k+8 - result 7016 ldp $h6q, $h7q, [$current_tag, #160] @ load h6l | h6h 7017 ext $h6.16b, $h6.16b, $h6.16b, #8 7018 ext $h7.16b, $h7.16b, $h7.16b, #8 7019 b.gt .L256_dec_blocks_more_than_7 7020 7021 mov $ctr7b, $ctr6b 7022 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 7023 mov $ctr6b, $ctr5b 7024 7025 mov $ctr5b, $ctr4b 7026 mov $ctr4b, $ctr3b 7027 movi $acc_l.8b, #0 7028 7029 movi $acc_h.8b, #0 7030 movi $acc_m.8b, #0 7031 mov $ctr3b, $ctr2b 7032 7033 cmp $main_end_input_ptr, #96 7034 mov $ctr2b, $ctr1b 7035 b.gt .L256_dec_blocks_more_than_6 7036 7037 mov $ctr7b, $ctr6b 7038 mov $ctr6b, $ctr5b 7039 7040 mov $ctr5b, $ctr4b 7041 cmp $main_end_input_ptr, #80 7042 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 7043 7044 mov $ctr4b, $ctr3b 7045 mov $ctr3b, $ctr1b 7046 b.gt .L256_dec_blocks_more_than_5 7047 7048 cmp $main_end_input_ptr, #64 7049 mov $ctr7b, $ctr6b 7050 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 7051 7052 mov $ctr6b, $ctr5b 7053 7054 mov $ctr5b, $ctr4b 7055 mov $ctr4b, $ctr1b 7056 b.gt .L256_dec_blocks_more_than_4 7057 7058 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 7059 mov $ctr7b, $ctr6b 7060 cmp $main_end_input_ptr, #48 7061 7062 mov $ctr6b, $ctr5b 7063 mov $ctr5b, $ctr1b 7064 b.gt .L256_dec_blocks_more_than_3 7065 7066 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k 7067 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 7068 mov $ctr7b, $ctr6b 7069 7070 cmp $main_end_input_ptr, #32 7071 mov $ctr6b, $ctr1b 7072 b.gt .L256_dec_blocks_more_than_2 7073 7074 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 7075 7076 mov $ctr7b, $ctr1b 7077 cmp $main_end_input_ptr, #16 7078 b.gt .L256_dec_blocks_more_than_1 7079 7080 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 7081 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k 7082 b .L256_dec_blocks_less_than_1 7083.L256_dec_blocks_more_than_7: @ blocks left > 7 7084 rev64 $res0b, $res1b @ GHASH final-7 block 7085 ldr $res1q, [$input_ptr], #16 @ AES final-6 block - load ciphertext 7086 st1 { $res4b}, [$output_ptr], #16 @ AES final-7 block - store result 7087 7088 ins $acc_m.d[0], $h78k.d[1] @ GHASH final-7 block - mid 7089 7090 eor $res0b, $res0b, $t0.16b @ feed in partial tag 7091 7092 ins $rk4v.d[0], $res0.d[1] @ GHASH final-7 block - mid 7093 eor3 $res4b, $res1b, $ctr1b, $t1.16b @ AES final-6 block - result 7094 7095 pmull2 $acc_h.1q, $res0.2d, $h8.2d @ GHASH final-7 block - high 7096 7097 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-7 block - mid 7098 movi $t0.8b, #0 @ suppress further partial tag feed in 7099 7100 pmull $acc_l.1q, $res0.1d, $h8.1d @ GHASH final-7 block - low 7101 pmull $acc_m.1q, $rk4v.1d, $acc_m.1d @ GHASH final-7 block - mid 7102.L256_dec_blocks_more_than_6: @ blocks left > 6 7103 7104 rev64 $res0b, $res1b @ GHASH final-6 block 7105 7106 eor $res0b, $res0b, $t0.16b @ feed in partial tag 7107 ldr $res1q, [$input_ptr], #16 @ AES final-5 block - load ciphertext 7108 movi $t0.8b, #0 @ suppress further partial tag feed in 7109 7110 ins $rk4v.d[0], $res0.d[1] @ GHASH final-6 block - mid 7111 st1 { $res4b}, [$output_ptr], #16 @ AES final-6 block - store result 7112 pmull2 $rk2q1, $res0.2d, $h7.2d @ GHASH final-6 block - high 7113 7114 pmull $rk3q1, $res0.1d, $h7.1d @ GHASH final-6 block - low 7115 7116 eor3 $res4b, $res1b, $ctr2b, $t1.16b @ AES final-5 block - result 7117 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-6 block - low 7118 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-6 block - mid 7119 7120 pmull $rk4v.1q, $rk4v.1d, $h78k.1d @ GHASH final-6 block - mid 7121 7122 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-6 block - mid 7123 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-6 block - high 7124.L256_dec_blocks_more_than_5: @ blocks left > 5 7125 7126 rev64 $res0b, $res1b @ GHASH final-5 block 7127 7128 eor $res0b, $res0b, $t0.16b @ feed in partial tag 7129 7130 pmull2 $rk2q1, $res0.2d, $h6.2d @ GHASH final-5 block - high 7131 ins $rk4v.d[0], $res0.d[1] @ GHASH final-5 block - mid 7132 7133 ldr $res1q, [$input_ptr], #16 @ AES final-4 block - load ciphertext 7134 7135 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-5 block - mid 7136 st1 { $res4b}, [$output_ptr], #16 @ AES final-5 block - store result 7137 7138 pmull $rk3q1, $res0.1d, $h6.1d @ GHASH final-5 block - low 7139 ins $rk4v.d[1], $rk4v.d[0] @ GHASH final-5 block - mid 7140 7141 pmull2 $rk4v.1q, $rk4v.2d, $h56k.2d @ GHASH final-5 block - mid 7142 7143 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-5 block - high 7144 eor3 $res4b, $res1b, $ctr3b, $t1.16b @ AES final-4 block - result 7145 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-5 block - low 7146 7147 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-5 block - mid 7148 movi $t0.8b, #0 @ suppress further partial tag feed in 7149.L256_dec_blocks_more_than_4: @ blocks left > 4 7150 7151 rev64 $res0b, $res1b @ GHASH final-4 block 7152 7153 eor $res0b, $res0b, $t0.16b @ feed in partial tag 7154 7155 ins $rk4v.d[0], $res0.d[1] @ GHASH final-4 block - mid 7156 ldr $res1q, [$input_ptr], #16 @ AES final-3 block - load ciphertext 7157 7158 movi $t0.8b, #0 @ suppress further partial tag feed in 7159 7160 pmull $rk3q1, $res0.1d, $h5.1d @ GHASH final-4 block - low 7161 pmull2 $rk2q1, $res0.2d, $h5.2d @ GHASH final-4 block - high 7162 7163 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-4 block - mid 7164 7165 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-4 block - high 7166 7167 pmull $rk4v.1q, $rk4v.1d, $h56k.1d @ GHASH final-4 block - mid 7168 7169 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-4 block - low 7170 st1 { $res4b}, [$output_ptr], #16 @ AES final-4 block - store result 7171 7172 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-4 block - mid 7173 eor3 $res4b, $res1b, $ctr4b, $t1.16b @ AES final-3 block - result 7174.L256_dec_blocks_more_than_3: @ blocks left > 3 7175 7176 ldr $h4q, [$current_tag, #112] @ load h4l | h4h 7177 ext $h4.16b, $h4.16b, $h4.16b, #8 7178 rev64 $res0b, $res1b @ GHASH final-3 block 7179 7180 eor $res0b, $res0b, $t0.16b @ feed in partial tag 7181 ldr $res1q, [$input_ptr], #16 @ AES final-2 block - load ciphertext 7182 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k 7183 7184 ins $rk4v.d[0], $res0.d[1] @ GHASH final-3 block - mid 7185 st1 { $res4b}, [$output_ptr], #16 @ AES final-3 block - store result 7186 7187 eor3 $res4b, $res1b, $ctr5b, $t1.16b @ AES final-2 block - result 7188 7189 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-3 block - mid 7190 7191 ins $rk4v.d[1], $rk4v.d[0] @ GHASH final-3 block - mid 7192 pmull $rk3q1, $res0.1d, $h4.1d @ GHASH final-3 block - low 7193 pmull2 $rk2q1, $res0.2d, $h4.2d @ GHASH final-3 block - high 7194 7195 movi $t0.8b, #0 @ suppress further partial tag feed in 7196 pmull2 $rk4v.1q, $rk4v.2d, $h34k.2d @ GHASH final-3 block - mid 7197 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-3 block - low 7198 7199 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-3 block - high 7200 7201 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-3 block - mid 7202.L256_dec_blocks_more_than_2: @ blocks left > 2 7203 7204 rev64 $res0b, $res1b @ GHASH final-2 block 7205 7206 ldr $h3q, [$current_tag, #80] @ load h3l | h3h 7207 ext $h3.16b, $h3.16b, $h3.16b, #8 7208 ldr $res1q, [$input_ptr], #16 @ AES final-1 block - load ciphertext 7209 7210 eor $res0b, $res0b, $t0.16b @ feed in partial tag 7211 7212 ins $rk4v.d[0], $res0.d[1] @ GHASH final-2 block - mid 7213 7214 pmull $rk3q1, $res0.1d, $h3.1d @ GHASH final-2 block - low 7215 st1 { $res4b}, [$output_ptr], #16 @ AES final-2 block - store result 7216 eor3 $res4b, $res1b, $ctr6b, $t1.16b @ AES final-1 block - result 7217 7218 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-2 block - mid 7219 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-2 block - low 7220 movi $t0.8b, #0 @ suppress further partial tag feed in 7221 7222 pmull $rk4v.1q, $rk4v.1d, $h34k.1d @ GHASH final-2 block - mid 7223 pmull2 $rk2q1, $res0.2d, $h3.2d @ GHASH final-2 block - high 7224 7225 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-2 block - mid 7226 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-2 block - high 7227.L256_dec_blocks_more_than_1: @ blocks left > 1 7228 7229 rev64 $res0b, $res1b @ GHASH final-1 block 7230 7231 eor $res0b, $res0b, $t0.16b @ feed in partial tag 7232 7233 ins $rk4v.d[0], $res0.d[1] @ GHASH final-1 block - mid 7234 ldr $h2q, [$current_tag, #64] @ load h2l | h2h 7235 ext $h2.16b, $h2.16b, $h2.16b, #8 7236 7237 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-1 block - mid 7238 ldr $res1q, [$input_ptr], #16 @ AES final block - load ciphertext 7239 st1 { $res4b}, [$output_ptr], #16 @ AES final-1 block - store result 7240 7241 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k 7242 pmull $rk3q1, $res0.1d, $h2.1d @ GHASH final-1 block - low 7243 7244 ins $rk4v.d[1], $rk4v.d[0] @ GHASH final-1 block - mid 7245 7246 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-1 block - low 7247 7248 eor3 $res4b, $res1b, $ctr7b, $t1.16b @ AES final block - result 7249 pmull2 $rk2q1, $res0.2d, $h2.2d @ GHASH final-1 block - high 7250 7251 pmull2 $rk4v.1q, $rk4v.2d, $h12k.2d @ GHASH final-1 block - mid 7252 7253 movi $t0.8b, #0 @ suppress further partial tag feed in 7254 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-1 block - high 7255 7256 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-1 block - mid 7257.L256_dec_blocks_less_than_1: @ blocks left <= 1 7258 7259 ld1 { $rk0}, [$output_ptr] @ load existing bytes where the possibly partial last block is to be stored 7260 mvn $temp0_x, xzr @ temp0_x = 0xffffffffffffffff 7261 and $bit_length, $bit_length, #127 @ bit_length %= 128 7262 7263 sub $bit_length, $bit_length, #128 @ bit_length -= 128 7264 rev32 $rtmp_ctr.16b, $rtmp_ctr.16b 7265 str $rtmp_ctrq, [$counter] @ store the updated counter 7266 7267 neg $bit_length, $bit_length @ bit_length = 128 - #bits in input (in range [1,128]) 7268 7269 and $bit_length, $bit_length, #127 @ bit_length %= 128 7270 7271 lsr $temp0_x, $temp0_x, $bit_length @ temp0_x is mask for top 64b of last block 7272 cmp $bit_length, #64 7273 mvn $temp1_x, xzr @ temp1_x = 0xffffffffffffffff 7274 7275 csel $temp3_x, $temp0_x, xzr, lt 7276 csel $temp2_x, $temp1_x, $temp0_x, lt 7277 7278 mov $ctr0.d[0], $temp2_x @ ctr0b is mask for last block 7279 mov $ctr0.d[1], $temp3_x 7280 7281 and $res1b, $res1b, $ctr0b @ possibly partial last block has zeroes in highest bits 7282 ldr $h1q, [$current_tag, #32] @ load h1l | h1h 7283 ext $h1.16b, $h1.16b, $h1.16b, #8 7284 bif $res4b, $rk0, $ctr0b @ insert existing bytes in top end of result before storing 7285 7286 rev64 $res0b, $res1b @ GHASH final block 7287 7288 eor $res0b, $res0b, $t0.16b @ feed in partial tag 7289 7290 ins $t0.d[0], $res0.d[1] @ GHASH final block - mid 7291 pmull2 $rk2q1, $res0.2d, $h1.2d @ GHASH final block - high 7292 7293 eor $t0.8b, $t0.8b, $res0.8b @ GHASH final block - mid 7294 7295 pmull $rk3q1, $res0.1d, $h1.1d @ GHASH final block - low 7296 eor $acc_hb, $acc_hb, $rk2 @ GHASH final block - high 7297 7298 pmull $t0.1q, $t0.1d, $h12k.1d @ GHASH final block - mid 7299 7300 eor $acc_mb, $acc_mb, $t0.16b @ GHASH final block - mid 7301 ldr $mod_constantd, [$modulo_constant] @ MODULO - load modulo constant 7302 eor $acc_lb, $acc_lb, $rk3 @ GHASH final block - low 7303 7304 pmull $t11.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid 7305 eor $t10.16b, $acc_hb, $acc_lb @ MODULO - karatsuba tidy up 7306 7307 ext $acc_hb, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment 7308 st1 { $res4b}, [$output_ptr] @ store all 16B 7309 7310 eor $acc_mb, $acc_mb, $t10.16b @ MODULO - karatsuba tidy up 7311 7312 eor $t11.16b, $acc_hb, $t11.16b @ MODULO - fold into mid 7313 eor $acc_mb, $acc_mb, $t11.16b @ MODULO - fold into mid 7314 7315 pmull $acc_h.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low 7316 7317 ext $acc_mb, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment 7318 eor $acc_lb, $acc_lb, $acc_hb @ MODULO - fold into low 7319 7320 eor $acc_lb, $acc_lb, $acc_mb @ MODULO - fold into low 7321 ext $acc_lb, $acc_lb, $acc_lb, #8 7322 rev64 $acc_lb, $acc_lb 7323 st1 { $acc_l.16b }, [$current_tag] 7324 mov x0, $byte_length 7325 7326 ldp d10, d11, [sp, #16] 7327 ldp d12, d13, [sp, #32] 7328 ldp d14, d15, [sp, #48] 7329 ldp d8, d9, [sp], #80 7330 ret 7331 7332.L256_dec_ret: 7333 mov w0, #0x0 7334 ret 7335.size unroll8_eor3_aes_gcm_dec_256_kernel,.-unroll8_eor3_aes_gcm_dec_256_kernel 7336___ 7337} 7338} 7339 7340$code.=<<___; 7341.asciz "AES GCM module for ARMv8, SPDX BSD-3-Clause by <xiaokang.qian\@arm.com>" 7342.align 2 7343#endif 7344___ 7345 7346{ 7347 my %opcode = ( 7348 "rax1" => 0xce608c00, "eor3" => 0xce000000, 7349 "bcax" => 0xce200000, "xar" => 0xce800000 ); 7350 7351 sub unsha3 { 7352 my ($mnemonic,$arg)=@_; 7353 7354 $arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)[^,]*(?:,\s*[qv]([0-9]+)[^,]*(?:,\s*[qv#]([0-9\-]+))?)?/ 7355 && 7356 sprintf ".inst\t0x%08x\t//%s %s", 7357 $opcode{$mnemonic}|$1|($2<<5)|($3<<16)|(eval($4)<<10), 7358 $mnemonic,$arg; 7359 } 7360 sub unvmov { 7361 my $arg=shift; 7362 7363 $arg =~ m/q([0-9]+)#(lo|hi),\s*q([0-9]+)#(lo|hi)/o && 7364 sprintf "ins v%d.d[%d],v%d.d[%d]",$1<8?$1:$1+8,($2 eq "lo")?0:1, 7365 $3<8?$3:$3+8,($4 eq "lo")?0:1; 7366 } 7367 7368 foreach(split("\n",$code)) { 7369 s/@\s/\/\//o; # old->new style commentary 7370 s/\`([^\`]*)\`/eval($1)/ge; 7371 7372 m/\bld1r\b/ and s/\.16b/.2d/g or 7373 s/\b(eor3|rax1|xar|bcax)\s+(v.*)/unsha3($1,$2)/ge; 7374 print $_,"\n"; 7375 } 7376} 7377 7378close STDOUT or die "error closing STDOUT: $!"; # enforce flush 7379