1#! /usr/bin/env perl 2# Copyright 2020-2022 The OpenSSL Project Authors. All Rights Reserved. 3# 4# Licensed under the Apache License 2.0 (the "License"). You may not use 5# this file except in compliance with the License. You can obtain a copy 6# in the file LICENSE in the source distribution or at 7# https://www.openssl.org/source/license.html 8 9# 10#======================================================================== 11# Written by Xiaokang Qian <xiaokang.qian@arm.com> for the OpenSSL project, 12# derived from https://github.com/ARM-software/AArch64cryptolib, original 13# author Samuel Lee <Samuel.Lee@arm.com>. The module is, however, dual 14# licensed under OpenSSL and SPDX BSD-3-Clause licenses depending on where you 15# obtain it. 16#======================================================================== 17# 18# Approach - We want to reload constants as we have plenty of spare ASIMD slots around crypto units for loading 19# Unroll x8 in main loop, main loop to act on 8 16B blocks per iteration, and then do modulo of the accumulated 20# intermediate hashesfrom the 8 blocks. 21# 22# ____________________________________________________ 23# | | 24# | PRE | 25# |____________________________________________________| 26# | | | | 27# | CTR block 8k+13| AES block 8k+8 | GHASH block 8k+0 | 28# |________________|________________|__________________| 29# | | | | 30# | CTR block 8k+14| AES block 8k+9 | GHASH block 8k+1 | 31# |________________|________________|__________________| 32# | | | | 33# | CTR block 8k+15| AES block 8k+10| GHASH block 8k+2 | 34# |________________|________________|__________________| 35# | | | | 36# | CTR block 8k+16| AES block 8k+11| GHASH block 8k+3 | 37# |________________|________________|__________________| 38# | | | | 39# | CTR block 8k+17| AES block 8k+12| GHASH block 8k+4 | 40# |________________|________________|__________________| 41# | | | | 42# | CTR block 8k+18| AES block 8k+13| GHASH block 8k+5 | 43# |________________|________________|__________________| 44# | | | | 45# | CTR block 8k+19| AES block 8k+14| GHASH block 8k+6 | 46# |________________|________________|__________________| 47# | | | | 48# | CTR block 8k+20| AES block 8k+15| GHASH block 8k+7 | 49# |________________|____(mostly)____|__________________| 50# | | 51# | MODULO | 52# |____________________________________________________| 53# 54# PRE: 55# Ensure previous generated intermediate hash is aligned and merged with result for GHASH 4k+0 56# EXT low_acc, low_acc, low_acc, #8 57# EOR res_curr (8k+0), res_curr (4k+0), low_acc 58# 59# CTR block: 60# Increment and byte reverse counter in scalar registers and transfer to SIMD registers 61# REV ctr32, rev_ctr32 62# ORR ctr64, constctr96_top32, ctr32, LSL #32 63# INS ctr_next.d[0], constctr96_bottom64 // Keeping this in scalar registers to free up space in SIMD RF 64# INS ctr_next.d[1], ctr64X 65# ADD rev_ctr32, #1 66# 67# AES block: 68# Do AES encryption/decryption on CTR block X and EOR it with input block X. Take 256 bytes key below for example. 69# Doing small trick here of loading input in scalar registers, EORing with last key and then transferring 70# Given we are very constrained in our ASIMD registers this is quite important 71# 72# Encrypt: 73# LDR input_low, [ input_ptr ], #8 74# LDR input_high, [ input_ptr ], #8 75# EOR input_low, k14_low 76# EOR input_high, k14_high 77# INS res_curr.d[0], input_low 78# INS res_curr.d[1], input_high 79# AESE ctr_curr, k0; AESMC ctr_curr, ctr_curr 80# AESE ctr_curr, k1; AESMC ctr_curr, ctr_curr 81# AESE ctr_curr, k2; AESMC ctr_curr, ctr_curr 82# AESE ctr_curr, k3; AESMC ctr_curr, ctr_curr 83# AESE ctr_curr, k4; AESMC ctr_curr, ctr_curr 84# AESE ctr_curr, k5; AESMC ctr_curr, ctr_curr 85# AESE ctr_curr, k6; AESMC ctr_curr, ctr_curr 86# AESE ctr_curr, k7; AESMC ctr_curr, ctr_curr 87# AESE ctr_curr, k8; AESMC ctr_curr, ctr_curr 88# AESE ctr_curr, k9; AESMC ctr_curr, ctr_curr 89# AESE ctr_curr, k10; AESMC ctr_curr, ctr_curr 90# AESE ctr_curr, k11; AESMC ctr_curr, ctr_curr 91# AESE ctr_curr, k12; AESMC ctr_curr, ctr_curr 92# AESE ctr_curr, k13 93# EOR res_curr, res_curr, ctr_curr 94# ST1 { res_curr.16b }, [ output_ptr ], #16 95# 96# Decrypt: 97# AESE ctr_curr, k0; AESMC ctr_curr, ctr_curr 98# AESE ctr_curr, k1; AESMC ctr_curr, ctr_curr 99# AESE ctr_curr, k2; AESMC ctr_curr, ctr_curr 100# AESE ctr_curr, k3; AESMC ctr_curr, ctr_curr 101# AESE ctr_curr, k4; AESMC ctr_curr, ctr_curr 102# AESE ctr_curr, k5; AESMC ctr_curr, ctr_curr 103# AESE ctr_curr, k6; AESMC ctr_curr, ctr_curr 104# AESE ctr_curr, k7; AESMC ctr_curr, ctr_curr 105# AESE ctr_curr, k8; AESMC ctr_curr, ctr_curr 106# AESE ctr_curr, k9; AESMC ctr_curr, ctr_curr 107# AESE ctr_curr, k10; AESMC ctr_curr, ctr_curr 108# AESE ctr_curr, k11; AESMC ctr_curr, ctr_curr 109# AESE ctr_curr, k12; AESMC ctr_curr, ctr_curr 110# AESE ctr_curr, k13 111# LDR res_curr, [ input_ptr ], #16 112# EOR res_curr, res_curr, ctr_curr 113# MOV output_low, res_curr.d[0] 114# MOV output_high, res_curr.d[1] 115# EOR output_low, k14_low 116# EOR output_high, k14_high 117# STP output_low, output_high, [ output_ptr ], #16 118 119# GHASH block X: 120# Do 128b karatsuba polynomial multiplication on block 121# We only have 64b->128b polynomial multipliers, naively that means we need to do 4 64b multiplies to generate a 128b 122# 123# multiplication: 124# Pmull(A,B) == (Pmull(Ah,Bh)<<128 | Pmull(Al,Bl)) ^ (Pmull(Ah,Bl) ^ Pmull(Al,Bh))<<64 125# 126# The idea behind Karatsuba multiplication is that we can do just 3 64b multiplies: 127# Pmull(A,B) == (Pmull(Ah,Bh)<<128 | Pmull(Al,Bl)) ^ (Pmull(Ah^Al,Bh^Bl) ^ Pmull(Ah,Bh) ^ Pmull(Al,Bl))<<64 128# 129# There is some complication here because the bit order of GHASH's PMULL is reversed compared to elsewhere, so we are 130# multiplying with "twisted" powers of H 131# 132# Note: We can PMULL directly into the acc_x in first GHASH of the loop 133# Note: For scheduling big cores we want to split the processing to happen over two loop iterations - otherwise the critical 134# path latency dominates the performance 135# 136# This has a knock on effect on register pressure, so we have to be a bit more clever with our temporary registers 137# than indicated here 138# REV64 res_curr, res_curr 139# INS t_m.d[0], res_curr.d[1] 140# EOR t_m.8B, t_m.8B, res_curr.8B 141# PMULL2 t_h, res_curr, HX 142# PMULL t_l, res_curr, HX 143# PMULL t_m, t_m, HX_k 144# EOR acc_h, acc_h, t_h 145# EOR acc_l, acc_l, t_l 146# EOR acc_m, acc_m, t_m 147# 148# MODULO: take the partial accumulators (~representing sum of 256b multiplication results), from GHASH and do modulo reduction on them 149# There is some complication here because the bit order of GHASH's PMULL is reversed compared to elsewhere, so we are doing modulo 150# with a reversed constant 151# EOR3 acc_m, acc_m, acc_l, acc_h // Finish off karatsuba processing 152# PMULL t_mod, acc_h, mod_constant 153# EXT acc_h, acc_h, acc_h, #8 154# EOR3 acc_m, acc_m, t_mod, acc_h 155# PMULL acc_h, acc_m, mod_constant 156# EXT acc_m, acc_m, acc_m, #8 157# EOR3 acc_l, acc_l, acc_m, acc_h 158 159$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef; 160$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef; 161 162$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 163( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or 164( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate ) or 165die "can't locate arm-xlate.pl"; 166 167die "only for 64 bit" if $flavour !~ /64/; 168 169open OUT,"| \"$^X\" $xlate $flavour $output"; 170*STDOUT=*OUT; 171 172$code=<<___; 173#include "arm_arch.h" 174 175#if __ARM_MAX_ARCH__>=8 176___ 177$code.=".arch armv8.2-a+crypto\n.text\n"; 178 179$input_ptr="x0"; #argument block 180$bit_length="x1"; 181$output_ptr="x2"; 182$current_tag="x3"; 183$counter="x16"; 184$constant_temp="x15"; 185$modulo_constant="x10"; 186$cc="x8"; 187{ 188my ($end_input_ptr,$main_end_input_ptr,$temp0_x,$temp1_x)=map("x$_",(4..7)); 189my ($temp2_x,$temp3_x)=map("x$_",(13..14)); 190my ($ctr0b,$ctr1b,$ctr2b,$ctr3b,$ctr4b,$ctr5b,$ctr6b,$ctr7b,$res0b,$res1b,$res2b,$res3b,$res4b,$res5b,$res6b,$res7b)=map("v$_.16b",(0..15)); 191my ($ctr0,$ctr1,$ctr2,$ctr3,$ctr4,$ctr5,$ctr6,$ctr7,$res0,$res1,$res2,$res3,$res4,$res5,$res6,$res7)=map("v$_",(0..15)); 192my ($ctr0d,$ctr1d,$ctr2d,$ctr3d,$ctr4d,$ctr5d,$ctr6d,$ctr7d)=map("d$_",(0..7)); 193my ($ctr0q,$ctr1q,$ctr2q,$ctr3q,$ctr4q,$ctr5q,$ctr6q,$ctr7q)=map("q$_",(0..7)); 194my ($res0q,$res1q,$res2q,$res3q,$res4q,$res5q,$res6q,$res7q)=map("q$_",(8..15)); 195 196my ($ctr_t0,$ctr_t1,$ctr_t2,$ctr_t3,$ctr_t4,$ctr_t5,$ctr_t6,$ctr_t7)=map("v$_",(8..15)); 197my ($ctr_t0b,$ctr_t1b,$ctr_t2b,$ctr_t3b,$ctr_t4b,$ctr_t5b,$ctr_t6b,$ctr_t7b)=map("v$_.16b",(8..15)); 198my ($ctr_t0q,$ctr_t1q,$ctr_t2q,$ctr_t3q,$ctr_t4q,$ctr_t5q,$ctr_t6q,$ctr_t7q)=map("q$_",(8..15)); 199 200my ($acc_hb,$acc_mb,$acc_lb)=map("v$_.16b",(17..19)); 201my ($acc_h,$acc_m,$acc_l)=map("v$_",(17..19)); 202 203my ($h1,$h12k,$h2,$h3,$h34k,$h4)=map("v$_",(20..25)); 204my ($h5,$h56k,$h6,$h7,$h78k,$h8)=map("v$_",(20..25)); 205my ($h1q,$h12kq,$h2q,$h3q,$h34kq,$h4q)=map("q$_",(20..25)); 206my ($h5q,$h56kq,$h6q,$h7q,$h78kq,$h8q)=map("q$_",(20..25)); 207 208my $t0="v16"; 209my $t0d="d16"; 210 211my $t1="v29"; 212my $t2=$res1; 213my $t3=$t1; 214 215my $t4=$res0; 216my $t5=$res2; 217my $t6=$t0; 218 219my $t7=$res3; 220my $t8=$res4; 221my $t9=$res5; 222 223my $t10=$res6; 224my $t11="v21"; 225my $t12=$t1; 226 227my $rtmp_ctr="v30"; 228my $rtmp_ctrq="q30"; 229my $rctr_inc="v31"; 230my $rctr_incd="d31"; 231 232my $mod_constantd=$t0d; 233my $mod_constant=$t0; 234 235my ($rk0,$rk1,$rk2)=map("v$_.16b",(26..28)); 236my ($rk3,$rk4,$rk5)=map("v$_.16b",(26..28)); 237my ($rk6,$rk7,$rk8)=map("v$_.16b",(26..28)); 238my ($rk9,$rk10,$rk11)=map("v$_.16b",(26..28)); 239my ($rk12,$rk13,$rk14)=map("v$_.16b",(26..28)); 240my ($rk0q,$rk1q,$rk2q)=map("q$_",(26..28)); 241my ($rk3q,$rk4q,$rk5q)=map("q$_",(26..28)); 242my ($rk6q,$rk7q,$rk8q)=map("q$_",(26..28)); 243my ($rk9q,$rk10q,$rk11q)=map("q$_",(26..28)); 244my ($rk12q,$rk13q,$rk14q)=map("q$_",(26..28)); 245my $rk2q1="v28.1q"; 246my $rk3q1="v26.1q"; 247my $rk4v="v27"; 248 249 250######################################################################################### 251# size_t unroll8_eor3_aes_gcm_enc_128_kernel(const unsigned char *in, 252# size_t len, 253# unsigned char *out, 254# const void *key, 255# unsigned char ivec[16], 256# u64 *Xi); 257# 258$code.=<<___; 259.global unroll8_eor3_aes_gcm_enc_128_kernel 260.type unroll8_eor3_aes_gcm_enc_128_kernel,%function 261.align 4 262unroll8_eor3_aes_gcm_enc_128_kernel: 263 AARCH64_VALID_CALL_TARGET 264 cbz x1, .L128_enc_ret 265 stp d8, d9, [sp, #-80]! 266 mov $counter, x4 267 mov $cc, x5 268 stp d10, d11, [sp, #16] 269 stp d12, d13, [sp, #32] 270 stp d14, d15, [sp, #48] 271 mov x5, #0xc200000000000000 272 stp x5, xzr, [sp, #64] 273 add $modulo_constant, sp, #64 274 275 mov $constant_temp, #0x100000000 @ set up counter increment 276 movi $rctr_inc.16b, #0x0 277 mov $rctr_inc.d[1], $constant_temp 278 lsr $main_end_input_ptr, $bit_length, #3 @ byte_len 279 ld1 { $ctr0b}, [$counter] @ CTR block 0 280 281 sub $main_end_input_ptr, $main_end_input_ptr, #1 @ byte_len - 1 282 283 and $main_end_input_ptr, $main_end_input_ptr, #0xffffffffffffff80 @ number of bytes to be processed in main loop (at least 1 byte must be handled by tail) 284 285 rev32 $rtmp_ctr.16b, $ctr0.16b @ set up reversed counter 286 287 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 0 288 289 rev32 $ctr1.16b, $rtmp_ctr.16b @ CTR block 1 290 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 1 291 292 rev32 $ctr2.16b, $rtmp_ctr.16b @ CTR block 2 293 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 2 294 295 rev32 $ctr3.16b, $rtmp_ctr.16b @ CTR block 3 296 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 3 297 298 rev32 $ctr4.16b, $rtmp_ctr.16b @ CTR block 4 299 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 4 300 301 rev32 $ctr5.16b, $rtmp_ctr.16b @ CTR block 5 302 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 5 303 ldp $rk0q, $rk1q, [$cc, #0] @ load rk0, rk1 304 305 rev32 $ctr6.16b, $rtmp_ctr.16b @ CTR block 6 306 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 6 307 308 rev32 $ctr7.16b, $rtmp_ctr.16b @ CTR block 7 309 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 7 310 311 aese $ctr4b, $rk0 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 0 312 aese $ctr6b, $rk0 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 0 313 aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 0 314 315 aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 0 316 aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 0 317 aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 0 318 319 aese $ctr7b, $rk0 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 0 320 aese $ctr5b, $rk0 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 0 321 ldp $rk2q, $rk3q, [$cc, #32] @ load rk2, rk3 322 323 aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 1 324 325 aese $ctr7b, $rk1 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 1 326 aese $ctr5b, $rk1 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 1 327 aese $ctr4b, $rk1 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 1 328 329 aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 1 330 aese $ctr6b, $rk1 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 1 331 aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 1 332 333 aese $ctr5b, $rk2 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 2 334 aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 1 335 aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 2 336 337 aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 2 338 aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 2 339 aese $ctr7b, $rk2 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 2 340 341 aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 2 342 aese $ctr6b, $rk2 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 2 343 aese $ctr4b, $rk2 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 2 344 345 aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 3 346 347 ldp $rk4q, $rk5q, [$cc, #64] @ load rk4, rk5 348 aese $ctr5b, $rk3 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 3 349 aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 3 350 351 aese $ctr4b, $rk3 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 3 352 aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 3 353 aese $ctr6b, $rk3 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 3 354 355 aese $ctr7b, $rk3 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 3 356 357 aese $ctr6b, $rk4 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 4 358 aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 3 359 aese $ctr5b, $rk4 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 4 360 361 aese $ctr7b, $rk4 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 4 362 aese $ctr4b, $rk4 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 4 363 aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 4 364 365 aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 4 366 aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 4 367 aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 4 368 369 aese $ctr7b, $rk5 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 5 370 aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 5 371 ldp $rk6q, $rk7q, [$cc, #96] @ load rk6, rk7 372 373 aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 5 374 aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 5 375 aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 5 376 377 aese $ctr4b, $rk5 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 5 378 aese $ctr5b, $rk5 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 5 379 aese $ctr6b, $rk5 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 5 380 381 aese $ctr4b, $rk6 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 6 382 aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 6 383 aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 6 384 385 aese $ctr7b, $rk6 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 6 386 aese $ctr6b, $rk6 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 6 387 aese $ctr5b, $rk6 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 6 388 389 aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 6 390 aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 6 391 ldp $rk8q, $rk9q, [$cc, #128] @ load rk8, rk9 392 393 aese $ctr5b, $rk7 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 7 394 395 ld1 { $acc_lb}, [$current_tag] 396 ext $acc_lb, $acc_lb, $acc_lb, #8 397 rev64 $acc_lb, $acc_lb 398 399 aese $ctr7b, $rk7 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 7 400 401 aese $ctr4b, $rk7 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 7 402 aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 7 403 aese $ctr6b, $rk7 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 7 404 405 aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 7 406 aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 7 407 aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 7 408 409 aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 8 410 aese $ctr6b, $rk8 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 8 411 aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 8 412 413 aese $ctr7b, $rk8 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 8 414 aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 8 415 ldr $rk10q, [$cc, #160] @ load rk10 416 417 aese $ctr3b, $rk9 @ AES block 8k+11 - round 9 418 aese $ctr4b, $rk8 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 8 419 aese $ctr2b, $rk9 @ AES block 8k+10 - round 9 420 421 aese $ctr5b, $rk8 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 8 422 aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 8 423 aese $ctr6b, $rk9 @ AES block 8k+14 - round 9 424 425 aese $ctr4b, $rk9 @ AES block 8k+12 - round 9 426 add $main_end_input_ptr, $main_end_input_ptr, $input_ptr 427 aese $ctr0b, $rk9 @ AES block 8k+8 - round 9 428 429 aese $ctr7b, $rk9 @ AES block 8k+15 - round 9 430 aese $ctr5b, $rk9 @ AES block 8k+13 - round 9 431 aese $ctr1b, $rk9 @ AES block 8k+9 - round 9 432 433 add $end_input_ptr, $input_ptr, $bit_length, lsr #3 @ end_input_ptr 434 cmp $input_ptr, $main_end_input_ptr @ check if we have <= 8 blocks 435 b.ge .L128_enc_tail @ handle tail 436 437 ldp $ctr_t0q, $ctr_t1q, [$input_ptr], #32 @ AES block 0, 1 - load plaintext 438 439 ldp $ctr_t2q, $ctr_t3q, [$input_ptr], #32 @ AES block 2, 3 - load plaintext 440 441 ldp $ctr_t4q, $ctr_t5q, [$input_ptr], #32 @ AES block 4, 5 - load plaintext 442 443 ldp $ctr_t6q, $ctr_t7q, [$input_ptr], #32 @ AES block 6, 7 - load plaintext 444 cmp $input_ptr, $main_end_input_ptr @ check if we have <= 8 blocks 445 446 eor3 $res0b, $ctr_t0b, $ctr0b, $rk10 @ AES block 0 - result 447 rev32 $ctr0.16b, $rtmp_ctr.16b @ CTR block 8 448 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8 449 450 eor3 $res1b, $ctr_t1b, $ctr1b, $rk10 @ AES block 1 - result 451 stp $res0q, $res1q, [$output_ptr], #32 @ AES block 0, 1 - store result 452 453 rev32 $ctr1.16b, $rtmp_ctr.16b @ CTR block 9 454 eor3 $res5b, $ctr_t5b, $ctr5b, $rk10 @ AES block 5 - result 455 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 9 456 457 eor3 $res2b, $ctr_t2b, $ctr2b, $rk10 @ AES block 2 - result 458 eor3 $res6b, $ctr_t6b, $ctr6b, $rk10 @ AES block 6 - result 459 eor3 $res4b, $ctr_t4b, $ctr4b, $rk10 @ AES block 4 - result 460 461 rev32 $ctr2.16b, $rtmp_ctr.16b @ CTR block 10 462 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 10 463 464 eor3 $res3b, $ctr_t3b, $ctr3b, $rk10 @ AES block 3 - result 465 eor3 $res7b, $ctr_t7b, $ctr7b,$rk10 @ AES block 7 - result 466 stp $res2q, $res3q, [$output_ptr], #32 @ AES block 2, 3 - store result 467 468 rev32 $ctr3.16b, $rtmp_ctr.16b @ CTR block 11 469 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 11 470 stp $res4q, $res5q, [$output_ptr], #32 @ AES block 4, 5 - store result 471 472 stp $res6q, $res7q, [$output_ptr], #32 @ AES block 6, 7 - store result 473 474 rev32 $ctr4.16b, $rtmp_ctr.16b @ CTR block 12 475 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 12 476 b.ge .L128_enc_prepretail @ do prepretail 477 478.L128_enc_main_loop: @ main loop start 479 rev32 $ctr5.16b, $rtmp_ctr.16b @ CTR block 8k+13 480 ldr $h5q, [$current_tag, #128] @ load h5l | h5h 481 ext $h5.16b, $h5.16b, $h5.16b, #8 482 ldr $h6q, [$current_tag, #160] @ load h6l | h6h 483 ext $h6.16b, $h6.16b, $h6.16b, #8 484 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+13 485 486 rev64 $res1b, $res1b @ GHASH block 8k+1 487 rev64 $res0b, $res0b @ GHASH block 8k 488 ldr $h7q, [$current_tag, #176] @ load h7l | h7h 489 ext $h7.16b, $h7.16b, $h7.16b, #8 490 ldr $h8q, [$current_tag, #208] @ load h8l | h8h 491 ext $h8.16b, $h8.16b, $h8.16b, #8 492 493 rev32 $ctr6.16b, $rtmp_ctr.16b @ CTR block 8k+14 494 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+14 495 ext $acc_lb, $acc_lb, $acc_lb, #8 @ PRE 0 496 497 ldr $h56kq, [$current_tag, #144] @ load h6k | h5k 498 ldr $h78kq, [$current_tag, #192] @ load h8k | h7k 499 rev64 $res5b, $res5b @ GHASH block 8k+5 (t0, t1, t2 and t3 free) 500 rev64 $res3b, $res3b @ GHASH block 8k+3 501 502 ldp $rk0q, $rk1q, [$cc, #0] @ load rk0, rk1 503 eor $res0b, $res0b, $acc_lb @ PRE 1 504 rev32 $ctr7.16b, $rtmp_ctr.16b @ CTR block 8k+15 505 506 rev64 $res7b, $res7b @ GHASH block 8k+7 (t0, t1, t2 and t3 free) 507 508 pmull2 $t0.1q, $res1.2d, $h7.2d @ GHASH block 8k+1 - high 509 rev64 $res2b, $res2b @ GHASH block 8k+2 510 pmull2 $acc_h.1q, $res0.2d, $h8.2d @ GHASH block 8k - high 511 512 pmull $h7.1q, $res1.1d, $h7.1d @ GHASH block 8k+1 - low 513 trn1 $acc_m.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid 514 pmull $acc_l.1q, $res0.1d, $h8.1d @ GHASH block 8k - low 515 516 trn2 $res0.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid 517 pmull2 $t1.1q, $res2.2d, $h6.2d @ GHASH block 8k+2 - high 518 pmull2 $t2.1q, $res3.2d, $h5.2d @ GHASH block 8k+3 - high 519 520 eor $acc_lb, $acc_lb, $h7.16b @ GHASH block 8k+1 - low 521 ldr $h3q, [$current_tag, #80] @ load h3l | h3h 522 ext $h3.16b, $h3.16b, $h3.16b, #8 523 ldr $h4q, [$current_tag, #112] @ load h3l | h3h 524 ext $h4.16b, $h4.16b, $h4.16b, #8 525 aese $ctr5b, $rk0 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 0 526 527 aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 0 528 aese $ctr4b, $rk0 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 0 529 eor $acc_hb, $acc_hb, $t0.16b @ GHASH block 8k+1 - high 530 531 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+15 532 aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 0 533 eor $res0.16b, $res0.16b, $acc_m.16b @ GHASH block 8k, 8k+1 - mid 534 535 aese $ctr6b, $rk0 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 0 536 aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 1 537 aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 0 538 539 aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 1 540 aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 0 541 pmull $h6.1q, $res2.1d, $h6.1d @ GHASH block 8k+2 - low 542 543 aese $ctr5b, $rk1 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 1 544 aese $ctr7b, $rk0 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 0 545 aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 1 546 547 eor3 $acc_hb, $acc_hb, $t1.16b,$t2.16b @ GHASH block 8k+2, 8k+3 - high 548 trn1 $t3.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid 549 trn2 $res2.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid 550 551 ldp $rk2q, $rk3q, [$cc, #32] @ load rk2, rk3 552 aese $ctr4b, $rk1 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 1 553 aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 1 554 555 pmull $h5.1q, $res3.1d, $h5.1d @ GHASH block 8k+3 - low 556 aese $ctr7b, $rk1 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 1 557 aese $ctr6b, $rk1 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 1 558 559 pmull2 $acc_m.1q, $res0.2d, $h78k.2d @ GHASH block 8k - mid 560 eor $res2.16b, $res2.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid 561 pmull $h78k.1q, $res0.1d, $h78k.1d @ GHASH block 8k+1 - mid 562 563 rev64 $res6b, $res6b @ GHASH block 8k+6 (t0, t1, and t2 free) 564 eor3 $acc_lb, $acc_lb, $h6.16b, $h5.16b @ GHASH block 8k+2, 8k+3 - low 565 566 pmull2 $t3.1q, $res2.2d, $h56k.2d @ GHASH block 8k+2 - mid 567 eor $acc_mb, $acc_mb, $h78k.16b @ GHASH block 8k+1 - mid 568 pmull $h56k.1q, $res2.1d, $h56k.1d @ GHASH block 8k+3 - mid 569 570 aese $ctr5b, $rk2 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 2 571 aese $ctr4b, $rk2 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 2 572 aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 2 573 574 aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 2 575 eor3 $acc_mb, $acc_mb, $h56k.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid 576 aese $ctr6b, $rk2 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 2 577 578 aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 2 579 aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 2 580 aese $ctr7b, $rk2 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 2 581 582 aese $ctr6b, $rk3 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 3 583 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k 584 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k 585 rev64 $res4b, $res4b @ GHASH block 8k+4 (t0, t1, and t2 free) 586 587 ldp $rk4q, $rk5q, [$cc, #64] @ load rk4, rk5 588 aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 3 589 aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 3 590 591 ldr $h1q, [$current_tag, #32] @ load h1l | h1h 592 ext $h1.16b, $h1.16b, $h1.16b, #8 593 ldr $h2q, [$current_tag, #64] @ load h1l | h1h 594 ext $h2.16b, $h2.16b, $h2.16b, #8 595 pmull2 $t4.1q, $res4.2d, $h4.2d @ GHASH block 8k+4 - high 596 pmull $h4.1q, $res4.1d, $h4.1d @ GHASH block 8k+4 - low 597 598 trn1 $t6.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid 599 trn2 $res4.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid 600 601 aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 3 602 aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 3 603 604 aese $ctr7b, $rk3 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 3 605 aese $ctr4b, $rk3 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 3 606 607 aese $ctr5b, $rk3 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 3 608 aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 4 609 610 aese $ctr7b, $rk4 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 4 611 aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 4 612 aese $ctr4b, $rk4 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 4 613 614 aese $ctr5b, $rk4 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 4 615 aese $ctr6b, $rk4 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 4 616 aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 4 617 618 pmull2 $t5.1q, $res5.2d, $h3.2d @ GHASH block 8k+5 - high 619 eor $res4.16b, $res4.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid 620 pmull $h3.1q, $res5.1d, $h3.1d @ GHASH block 8k+5 - low 621 622 aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 4 623 ldp $rk6q, $rk7q, [$cc, #96] @ load rk6, rk7 624 trn1 $t9.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid 625 626 pmull2 $t6.1q, $res4.2d, $h34k.2d @ GHASH block 8k+4 - mid 627 pmull $h34k.1q, $res4.1d, $h34k.1d @ GHASH block 8k+5 - mid 628 pmull2 $t7.1q, $res6.2d, $h2.2d @ GHASH block 8k+6 - high 629 630 pmull2 $t8.1q, $res7.2d, $h1.2d @ GHASH block 8k+7 - high 631 aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 5 632 aese $ctr5b, $rk5 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 5 633 634 pmull $h2.1q, $res6.1d, $h2.1d @ GHASH block 8k+6 - low 635 eor3 $acc_hb, $acc_hb, $t4.16b, $t5.16b @ GHASH block 8k+4, 8k+5 - high 636 trn2 $res6.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid 637 638 eor3 $acc_lb, $acc_lb, $h4.16b, $h3.16b @ GHASH block 8k+4, 8k+5 - low 639 aese $ctr6b, $rk5 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 5 640 641 eor $res6.16b, $res6.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid 642 aese $ctr7b, $rk5 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 5 643 aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 5 644 645 aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 5 646 aese $ctr4b, $rk5 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 5 647 aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 5 648 649 eor3 $acc_mb, $acc_mb, $h34k.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid 650 ldr $mod_constantd, [$modulo_constant] @ MODULO - load modulo constant 651 pmull $h1.1q, $res7.1d, $h1.1d @ GHASH block 8k+7 - low 652 653 aese $ctr7b, $rk6 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 6 654 aese $ctr5b, $rk6 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 6 655 656 pmull2 $t9.1q, $res6.2d, $h12k.2d @ GHASH block 8k+6 - mid 657 aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 6 658 aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 6 659 660 pmull $h12k.1q, $res6.1d, $h12k.1d @ GHASH block 8k+7 - mid 661 eor3 $acc_lb, $acc_lb, $h2.16b, $h1.16b @ GHASH block 8k+6, 8k+7 - low 662 ldp $ctr_t0q, $ctr_t1q, [$input_ptr], #32 @ AES block 8k+8, 8k+9 - load plaintext 663 664 aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 6 665 rev32 $h1.16b, $rtmp_ctr.16b @ CTR block 8k+16 666 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+16 667 668 aese $ctr4b, $rk6 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 6 669 aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 6 670 aese $ctr6b, $rk6 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 6 671 672 eor3 $acc_mb, $acc_mb, $h12k.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid 673 ldp $rk8q, $rk9q, [$cc, #128] @ load rk8, rk9 674 eor3 $acc_hb, $acc_hb, $t7.16b, $t8.16b @ GHASH block 8k+6, 8k+7 - high 675 676 aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 7 677 aese $ctr7b, $rk7 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 7 678 ldp $ctr_t2q, $ctr_t3q, [$input_ptr], #32 @ AES block 8k+10, 8k+11 - load plaintext 679 680 aese $ctr5b, $rk7 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 7 681 aese $ctr6b, $rk7 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 7 682 aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 7 683 684 pmull $t11.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid 685 aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 7 686 aese $ctr4b, $rk7 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 7 687 688 rev32 $h2.16b, $rtmp_ctr.16b @ CTR block 8k+17 689 aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 7 690 691 aese $ctr5b, $rk8 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 8 692 ldp $ctr_t4q, $ctr_t5q, [$input_ptr], #32 @ AES block 8k+12, 8k+13 - load plaintext 693 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+17 694 695 aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 8 696 aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 8 697 aese $ctr7b, $rk8 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 8 698 699 aese $ctr4b, $rk8 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 8 700 eor3 $acc_mb, $acc_mb, $acc_hb, $acc_lb @ MODULO - karatsuba tidy up 701 ldr $rk10q, [$cc, #160] @ load rk10 702 703 ext $t12.16b, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment 704 rev32 $h3.16b, $rtmp_ctr.16b @ CTR block 8k+18 705 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+18 706 aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 8 707 708 aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 8 709 eor3 $acc_mb, $acc_mb, $t12.16b, $t11.16b @ MODULO - fold into mid 710 aese $ctr6b, $rk8 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 8 711 712 aese $ctr2b, $rk9 @ AES block 8k+10 - round 9 713 aese $ctr4b, $rk9 @ AES block 8k+12 - round 9 714 aese $ctr1b, $rk9 @ AES block 8k+9 - round 9 715 716 ldp $ctr_t6q, $ctr_t7q, [$input_ptr], #32 @ AES block 8k+14, 8k+15 - load plaintext 717 rev32 $h4.16b, $rtmp_ctr.16b @ CTR block 8k+19 718 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+19 719 720 cmp $input_ptr, $main_end_input_ptr @ LOOP CONTROL 721 eor3 $res4b, $ctr_t4b, $ctr4b, $rk10 @ AES block 4 - result 722 aese $ctr7b, $rk9 @ AES block 8k+15 - round 9 723 724 aese $ctr6b, $rk9 @ AES block 8k+14 - round 9 725 aese $ctr3b, $rk9 @ AES block 8k+11 - round 9 726 727 eor3 $res2b, $ctr_t2b, $ctr2b, $rk10 @ AES block 8k+10 - result 728 729 mov $ctr2.16b, $h3.16b @ CTR block 8k+18 730 aese $ctr0b, $rk9 @ AES block 8k+8 - round 9 731 732 rev32 $ctr4.16b, $rtmp_ctr.16b @ CTR block 8k+20 733 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+20 734 735 eor3 $res7b, $ctr_t7b, $ctr7b, $rk10 @ AES block 7 - result 736 aese $ctr5b, $rk9 @ AES block 8k+13 - round 9 737 pmull $acc_h.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low 738 739 eor3 $res1b, $ctr_t1b, $ctr1b, $rk10 @ AES block 8k+9 - result 740 eor3 $res3b, $ctr_t3b, $ctr3b, $rk10 @ AES block 8k+11 - result 741 mov $ctr3.16b, $h4.16b @ CTR block 8k+19 742 743 ext $t11.16b, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment 744 eor3 $res5b, $ctr_t5b, $ctr5b, $rk10 @ AES block 5 - result 745 mov $ctr1.16b, $h2.16b @ CTR block 8k+17 746 747 eor3 $res0b, $ctr_t0b, $ctr0b, $rk10 @ AES block 8k+8 - result 748 mov $ctr0.16b, $h1.16b @ CTR block 8k+16 749 stp $res0q, $res1q, [$output_ptr], #32 @ AES block 8k+8, 8k+9 - store result 750 751 stp $res2q, $res3q, [$output_ptr], #32 @ AES block 8k+10, 8k+11 - store result 752 eor3 $res6b, $ctr_t6b, $ctr6b, $rk10 @ AES block 6 - result 753 754 stp $res4q, $res5q, [$output_ptr], #32 @ AES block 8k+12, 8k+13 - store result 755 eor3 $acc_lb, $acc_lb, $acc_hb, $t11.16b @ MODULO - fold into low 756 757 stp $res6q, $res7q, [$output_ptr], #32 @ AES block 8k+14, 8k+15 - store result 758 b.lt .L128_enc_main_loop 759 760.L128_enc_prepretail: @ PREPRETAIL 761 rev32 $ctr5.16b, $rtmp_ctr.16b @ CTR block 8k+13 762 ldr $h7q, [$current_tag, #176] @ load h7l | h7h 763 ext $h7.16b, $h7.16b, $h7.16b, #8 764 ldr $h8q, [$current_tag, #208] @ load h8l | h8h 765 ext $h8.16b, $h8.16b, $h8.16b, #8 766 ext $acc_lb, $acc_lb, $acc_lb, #8 @ PRE 0 767 768 ldr $h5q, [$current_tag, #128] @ load h5l | h5h 769 ext $h5.16b, $h5.16b, $h5.16b, #8 770 ldr $h6q, [$current_tag, #160] @ load h6l | h6h 771 ext $h6.16b, $h6.16b, $h6.16b, #8 772 rev64 $res0b, $res0b @ GHASH block 8k 773 rev64 $res1b, $res1b @ GHASH block 8k+1 774 775 ldr $h56kq, [$current_tag, #144] @ load h6k | h5k 776 ldr $h78kq, [$current_tag, #192] @ load h6k | h5k 777 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+13 778 rev64 $res3b, $res3b @ GHASH block 8k+3 779 780 rev64 $res2b, $res2b @ GHASH block 8k+2 781 eor $res0b, $res0b, $acc_lb @ PRE 1 782 783 rev32 $ctr6.16b, $rtmp_ctr.16b @ CTR block 8k+14 784 785 pmull2 $t0.1q, $res1.2d, $h7.2d @ GHASH block 8k+1 - high 786 pmull $acc_l.1q, $res0.1d, $h8.1d @ GHASH block 8k - low 787 pmull2 $acc_h.1q, $res0.2d, $h8.2d @ GHASH block 8k - high 788 789 rev64 $res5b, $res5b @ GHASH block 8k+5 (t0, t1, t2 and t3 free) 790 trn1 $acc_m.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid 791 792 pmull $h7.1q, $res1.1d, $h7.1d @ GHASH block 8k+1 - low 793 eor $acc_hb, $acc_hb, $t0.16b @ GHASH block 8k+1 - high 794 trn2 $res0.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid 795 796 eor $acc_lb, $acc_lb, $h7.16b @ GHASH block 8k+1 - low 797 eor $res0.16b, $res0.16b, $acc_m.16b @ GHASH block 8k, 8k+1 - mid 798 799 ldp $rk0q, $rk1q, [$cc, #0] @ load rk0, rk1 800 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+14 801 802 pmull2 $acc_m.1q, $res0.2d, $h78k.2d @ GHASH block 8k - mid 803 pmull $h78k.1q, $res0.1d, $h78k.1d @ GHASH block 8k+1 - mid 804 805 rev64 $res4b, $res4b @ GHASH block 8k+4 (t0, t1, and t2 free) 806 rev64 $res7b, $res7b @ GHASH block 8k+7 (t0, t1, t2 and t3 free) 807 808 eor $acc_mb, $acc_mb, $h78k.16b @ GHASH block 8k+1 - mid 809 810 rev32 $ctr7.16b, $rtmp_ctr.16b @ CTR block 8k+15 811 812 rev64 $res6b, $res6b @ GHASH block 8k+6 (t0, t1, and t2 free) 813 814 aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 0 815 816 pmull2 $t2.1q, $res3.2d, $h5.2d @ GHASH block 8k+3 - high 817 pmull2 $t1.1q, $res2.2d, $h6.2d @ GHASH block 8k+2 - high 818 819 aese $ctr6b, $rk0 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 0 820 aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 0 821 822 pmull $h6.1q, $res2.1d, $h6.1d @ GHASH block 8k+2 - low 823 aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 0 824 825 eor3 $acc_hb, $acc_hb, $t1.16b, $t2.16b @ GHASH block 8k+2, 8k+3 - high 826 trn1 $t3.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid 827 trn2 $res2.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid 828 829 aese $ctr5b, $rk0 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 0 830 aese $ctr7b, $rk0 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 0 831 832 eor $res2.16b, $res2.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid 833 aese $ctr4b, $rk0 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 0 834 aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 0 835 836 aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 1 837 pmull $h5.1q, $res3.1d, $h5.1d @ GHASH block 8k+3 - low 838 839 ldr $h3q, [$current_tag, #80] @ load h3l | h3h 840 ext $h3.16b, $h3.16b, $h3.16b, #8 841 ldr $h4q, [$current_tag, #112] @ load h4l | h4h 842 ext $h4.16b, $h4.16b, $h4.16b, #8 843 844 ldp $rk2q, $rk3q, [$cc, #32] @ load rk2, rk3 845 aese $ctr5b, $rk1 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 1 846 pmull2 $t3.1q, $res2.2d, $h56k.2d @ GHASH block 8k+2 - mid 847 848 eor3 $acc_lb, $acc_lb, $h6.16b, $h5.16b @ GHASH block 8k+2, 8k+3 - low 849 pmull $h56k.1q, $res2.1d, $h56k.1d @ GHASH block 8k+3 - mid 850 851 aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 1 852 aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 1 853 854 eor3 $acc_mb, $acc_mb, $h56k.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid 855 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k 856 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k 857 aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 1 858 859 aese $ctr4b, $rk1 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 1 860 aese $ctr7b, $rk1 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 1 861 862 aese $ctr5b, $rk2 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 2 863 aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 2 864 aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 2 865 866 aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 2 867 aese $ctr6b, $rk1 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 1 868 aese $ctr4b, $rk2 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 2 869 870 aese $ctr5b, $rk3 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 3 871 aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 2 872 873 aese $ctr6b, $rk2 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 2 874 aese $ctr7b, $rk2 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 2 875 ldp $rk4q, $rk5q, [$cc, #64] @ load rk4, rk5 876 877 ldr $h1q, [$current_tag, #32] @ load h1l | h1h 878 ext $h1.16b, $h1.16b, $h1.16b, #8 879 ldr $h2q, [$current_tag, #64] @ load h1l | h1h 880 ext $h2.16b, $h2.16b, $h2.16b, #8 881 trn1 $t6.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid 882 aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 3 883 884 pmull2 $t4.1q, $res4.2d, $h4.2d @ GHASH block 8k+4 - high 885 aese $ctr6b, $rk3 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 3 886 aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 3 887 888 pmull $h4.1q, $res4.1d, $h4.1d @ GHASH block 8k+4 - low 889 trn2 $res4.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid 890 pmull2 $t5.1q, $res5.2d, $h3.2d @ GHASH block 8k+5 - high 891 892 aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 3 893 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+15 894 895 aese $ctr7b, $rk3 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 3 896 aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 3 897 eor $res4.16b, $res4.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid 898 899 pmull $h3.1q, $res5.1d, $h3.1d @ GHASH block 8k+5 - low 900 aese $ctr4b, $rk3 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 3 901 pmull2 $t7.1q, $res6.2d, $h2.2d @ GHASH block 8k+6 - high 902 903 trn1 $t9.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid 904 pmull $h2.1q, $res6.1d, $h2.1d @ GHASH block 8k+6 - low 905 trn2 $res6.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid 906 907 aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 4 908 aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 4 909 eor3 $acc_hb, $acc_hb, $t4.16b, $t5.16b @ GHASH block 8k+4, 8k+5 - high 910 911 eor3 $acc_lb, $acc_lb, $h4.16b, $h3.16b @ GHASH block 8k+4, 8k+5 - low 912 eor $res6.16b, $res6.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid 913 pmull2 $t6.1q, $res4.2d, $h34k.2d @ GHASH block 8k+4 - mid 914 915 aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 5 916 aese $ctr6b, $rk4 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 4 917 aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 4 918 919 aese $ctr7b, $rk4 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 4 920 aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 4 921 922 pmull $h34k.1q, $res4.1d, $h34k.1d @ GHASH block 8k+5 - mid 923 aese $ctr4b, $rk4 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 4 924 aese $ctr5b, $rk4 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 4 925 926 pmull2 $t8.1q, $res7.2d, $h1.2d @ GHASH block 8k+7 - high 927 ldp $rk6q, $rk7q, [$cc, #96] @ load rk6, rk7 928 pmull $h1.1q, $res7.1d, $h1.1d @ GHASH block 8k+7 - low 929 930 eor3 $acc_mb, $acc_mb, $h34k.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid 931 pmull2 $t9.1q, $res6.2d, $h12k.2d @ GHASH block 8k+6 - mid 932 pmull $h12k.1q, $res6.1d, $h12k.1d @ GHASH block 8k+7 - mid 933 934 aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 5 935 aese $ctr7b, $rk5 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 5 936 ldr $mod_constantd, [$modulo_constant] @ MODULO - load modulo constant 937 938 aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 5 939 aese $ctr4b, $rk5 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 5 940 941 eor3 $acc_hb, $acc_hb, $t7.16b, $t8.16b @ GHASH block 8k+6, 8k+7 - high 942 aese $ctr5b, $rk5 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 5 943 aese $ctr6b, $rk5 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 5 944 945 aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 5 946 aese $ctr4b, $rk6 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 6 947 948 aese $ctr5b, $rk6 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 6 949 aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 6 950 aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 6 951 952 aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 6 953 eor3 $acc_lb, $acc_lb, $h2.16b, $h1.16b @ GHASH block 8k+6, 8k+7 - low 954 eor3 $acc_mb, $acc_mb, $h12k.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid 955 956 aese $ctr6b, $rk6 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 6 957 aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 6 958 aese $ctr7b, $rk6 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 6 959 960 pmull $t11.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid 961 eor3 $acc_mb, $acc_mb, $acc_hb, $acc_lb @ MODULO - karatsuba tidy up 962 ldp $rk8q, $rk9q, [$cc, #128] @ load rk8, rk9 963 964 aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 7 965 aese $ctr6b, $rk7 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 7 966 aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 7 967 ext $t12.16b, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment 968 969 aese $ctr5b, $rk7 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 7 970 aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 7 971 eor3 $acc_mb, $acc_mb, $t12.16b, $t11.16b @ MODULO - fold into mid 972 973 aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 7 974 aese $ctr7b, $rk7 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 7 975 976 pmull $acc_h.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low 977 aese $ctr4b, $rk7 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 7 978 979 aese $ctr7b, $rk8 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 8 980 aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 8 981 aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 8 982 ext $acc_mb, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment 983 984 aese $ctr6b, $rk8 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 8 985 eor3 $acc_lb, $acc_lb, $acc_hb, $acc_mb @ MODULO - fold into low 986 aese $ctr4b, $rk8 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 8 987 988 aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 8 989 aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 8 990 aese $ctr5b, $rk8 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 8 991 992 ldr $rk10q, [$cc, #160] @ load rk10 993 aese $ctr6b, $rk9 @ AES block 8k+14 - round 9 994 aese $ctr2b, $rk9 @ AES block 8k+10 - round 9 995 996 aese $ctr0b, $rk9 @ AES block 8k+8 - round 9 997 aese $ctr1b, $rk9 @ AES block 8k+9 - round 9 998 999 aese $ctr3b, $rk9 @ AES block 8k+11 - round 9 1000 aese $ctr5b, $rk9 @ AES block 8k+13 - round 9 1001 1002 aese $ctr4b, $rk9 @ AES block 8k+12 - round 9 1003 aese $ctr7b, $rk9 @ AES block 8k+15 - round 9 1004.L128_enc_tail: @ TAIL 1005 1006 sub $main_end_input_ptr, $end_input_ptr, $input_ptr @ main_end_input_ptr is number of bytes left to process 1007 ldr $ctr_t0q, [$input_ptr], #16 @ AES block 8k+8 - load plaintext 1008 1009 mov $t1.16b, $rk10 1010 ldp $h5q, $h56kq, [$current_tag, #128] @ load h5l | h5h 1011 ext $h5.16b, $h5.16b, $h5.16b, #8 1012 1013 eor3 $res1b, $ctr_t0b, $ctr0b, $t1.16b @ AES block 8k+8 - result 1014 ext $t0.16b, $acc_lb, $acc_lb, #8 @ prepare final partial tag 1015 ldp $h6q, $h7q, [$current_tag, #160] @ load h6l | h6h 1016 ext $h6.16b, $h6.16b, $h6.16b, #8 1017 ext $h7.16b, $h7.16b, $h7.16b, #8 1018 1019 ldp $h78kq, $h8q, [$current_tag, #192] @ load h8k | h7k 1020 ext $h8.16b, $h8.16b, $h8.16b, #8 1021 cmp $main_end_input_ptr, #112 1022 b.gt .L128_enc_blocks_more_than_7 1023 1024 mov $ctr7b, $ctr6b 1025 mov $ctr6b, $ctr5b 1026 movi $acc_h.8b, #0 1027 1028 cmp $main_end_input_ptr, #96 1029 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 1030 mov $ctr5b, $ctr4b 1031 1032 mov $ctr4b, $ctr3b 1033 mov $ctr3b, $ctr2b 1034 mov $ctr2b, $ctr1b 1035 1036 movi $acc_l.8b, #0 1037 movi $acc_m.8b, #0 1038 b.gt .L128_enc_blocks_more_than_6 1039 1040 mov $ctr7b, $ctr6b 1041 cmp $main_end_input_ptr, #80 1042 1043 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 1044 mov $ctr6b, $ctr5b 1045 mov $ctr5b, $ctr4b 1046 1047 mov $ctr4b, $ctr3b 1048 mov $ctr3b, $ctr1b 1049 b.gt .L128_enc_blocks_more_than_5 1050 1051 cmp $main_end_input_ptr, #64 1052 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 1053 1054 mov $ctr7b, $ctr6b 1055 mov $ctr6b, $ctr5b 1056 1057 mov $ctr5b, $ctr4b 1058 mov $ctr4b, $ctr1b 1059 b.gt .L128_enc_blocks_more_than_4 1060 1061 mov $ctr7b, $ctr6b 1062 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 1063 mov $ctr6b, $ctr5b 1064 1065 mov $ctr5b, $ctr1b 1066 cmp $main_end_input_ptr, #48 1067 b.gt .L128_enc_blocks_more_than_3 1068 1069 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 1070 mov $ctr7b, $ctr6b 1071 mov $ctr6b, $ctr1b 1072 1073 cmp $main_end_input_ptr, #32 1074 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k 1075 b.gt .L128_enc_blocks_more_than_2 1076 1077 cmp $main_end_input_ptr, #16 1078 1079 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 1080 mov $ctr7b, $ctr1b 1081 b.gt .L128_enc_blocks_more_than_1 1082 1083 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k 1084 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 1085 b .L128_enc_blocks_less_than_1 1086.L128_enc_blocks_more_than_7: @ blocks left > 7 1087 st1 { $res1b}, [$output_ptr], #16 @ AES final-7 block - store result 1088 1089 rev64 $res0b, $res1b @ GHASH final-7 block 1090 ldr $ctr_t1q, [$input_ptr], #16 @ AES final-6 block - load plaintext 1091 1092 eor $res0b, $res0b, $t0.16b @ feed in partial tag 1093 1094 ins $rk4v.d[0], $res0.d[1] @ GHASH final-7 block - mid 1095 1096 pmull2 $acc_h.1q, $res0.2d, $h8.2d @ GHASH final-7 block - high 1097 1098 ins $acc_m.d[0], $h78k.d[1] @ GHASH final-7 block - mid 1099 1100 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-7 block - mid 1101 movi $t0.8b, #0 @ supress further partial tag feed in 1102 1103 eor3 $res1b, $ctr_t1b, $ctr1b, $t1.16b @ AES final-6 block - result 1104 1105 pmull $acc_m.1q, $rk4v.1d, $acc_m.1d @ GHASH final-7 block - mid 1106 pmull $acc_l.1q, $res0.1d, $h8.1d @ GHASH final-7 block - low 1107.L128_enc_blocks_more_than_6: @ blocks left > 6 1108 1109 st1 { $res1b}, [$output_ptr], #16 @ AES final-6 block - store result 1110 1111 rev64 $res0b, $res1b @ GHASH final-6 block 1112 ldr $ctr_t1q, [$input_ptr], #16 @ AES final-5 block - load plaintext 1113 1114 eor $res0b, $res0b, $t0.16b @ feed in partial tag 1115 1116 ins $rk4v.d[0], $res0.d[1] @ GHASH final-6 block - mid 1117 1118 eor3 $res1b, $ctr_t1b, $ctr2b, $t1.16b @ AES final-5 block - result 1119 pmull $rk3q1, $res0.1d, $h7.1d @ GHASH final-6 block - low 1120 1121 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-6 block - mid 1122 movi $t0.8b, #0 @ supress further partial tag feed in 1123 1124 pmull $rk4v.1q, $rk4v.1d, $h78k.1d @ GHASH final-6 block - mid 1125 pmull2 $rk2q1, $res0.2d, $h7.2d @ GHASH final-6 block - high 1126 1127 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-6 block - low 1128 1129 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-6 block - mid 1130 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-6 block - high 1131.L128_enc_blocks_more_than_5: @ blocks left > 5 1132 1133 st1 { $res1b}, [$output_ptr], #16 @ AES final-5 block - store result 1134 1135 rev64 $res0b, $res1b @ GHASH final-5 block 1136 1137 eor $res0b, $res0b, $t0.16b @ feed in partial tag 1138 1139 ins $rk4v.d[0], $res0.d[1] @ GHASH final-5 block - mid 1140 ldr $ctr_t1q, [$input_ptr], #16 @ AES final-4 block - load plaintext 1141 pmull2 $rk2q1, $res0.2d, $h6.2d @ GHASH final-5 block - high 1142 1143 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-5 block - high 1144 1145 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-5 block - mid 1146 1147 ins $rk4v.d[1], $rk4v.d[0] @ GHASH final-5 block - mid 1148 1149 eor3 $res1b, $ctr_t1b, $ctr3b, $t1.16b @ AES final-4 block - result 1150 pmull $rk3q1, $res0.1d, $h6.1d @ GHASH final-5 block - low 1151 movi $t0.8b, #0 @ supress further partial tag feed in 1152 1153 pmull2 $rk4v.1q, $rk4v.2d, $h56k.2d @ GHASH final-5 block - mid 1154 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-5 block - low 1155 1156 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-5 block - mid 1157.L128_enc_blocks_more_than_4: @ blocks left > 4 1158 1159 st1 { $res1b}, [$output_ptr], #16 @ AES final-4 block - store result 1160 1161 rev64 $res0b, $res1b @ GHASH final-4 block 1162 1163 ldr $ctr_t1q, [$input_ptr], #16 @ AES final-3 block - load plaintext 1164 1165 eor $res0b, $res0b, $t0.16b @ feed in partial tag 1166 1167 ins $rk4v.d[0], $res0.d[1] @ GHASH final-4 block - mid 1168 movi $t0.8b, #0 @ supress further partial tag feed in 1169 pmull2 $rk2q1, $res0.2d, $h5.2d @ GHASH final-4 block - high 1170 1171 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-4 block - mid 1172 1173 pmull $rk3q1, $res0.1d, $h5.1d @ GHASH final-4 block - low 1174 1175 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-4 block - high 1176 pmull $rk4v.1q, $rk4v.1d, $h56k.1d @ GHASH final-4 block - mid 1177 1178 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-4 block - low 1179 1180 eor3 $res1b, $ctr_t1b, $ctr4b, $t1.16b @ AES final-3 block - result 1181 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-4 block - mid 1182.L128_enc_blocks_more_than_3: @ blocks left > 3 1183 1184 st1 { $res1b}, [$output_ptr], #16 @ AES final-3 block - store result 1185 1186 ldr $h4q, [$current_tag, #112] @ load h4l | h4h 1187 ext $h4.16b, $h4.16b, $h4.16b, #8 1188 1189 rev64 $res0b, $res1b @ GHASH final-3 block 1190 1191 eor $res0b, $res0b, $t0.16b @ feed in partial tag 1192 movi $t0.8b, #0 @ supress further partial tag feed in 1193 1194 ins $rk4v.d[0], $res0.d[1] @ GHASH final-3 block - mid 1195 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k 1196 pmull $rk3q1, $res0.1d, $h4.1d @ GHASH final-3 block - low 1197 1198 ldr $ctr_t1q, [$input_ptr], #16 @ AES final-2 block - load plaintext 1199 1200 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-3 block - mid 1201 1202 ins $rk4v.d[1], $rk4v.d[0] @ GHASH final-3 block - mid 1203 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-3 block - low 1204 1205 eor3 $res1b, $ctr_t1b, $ctr5b, $t1.16b @ AES final-2 block - result 1206 1207 pmull2 $rk4v.1q, $rk4v.2d, $h34k.2d @ GHASH final-3 block - mid 1208 pmull2 $rk2q1, $res0.2d, $h4.2d @ GHASH final-3 block - high 1209 1210 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-3 block - mid 1211 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-3 block - high 1212.L128_enc_blocks_more_than_2: @ blocks left > 2 1213 1214 st1 { $res1b}, [$output_ptr], #16 @ AES final-2 block - store result 1215 1216 rev64 $res0b, $res1b @ GHASH final-2 block 1217 1218 eor $res0b, $res0b, $t0.16b @ feed in partial tag 1219 1220 ldr $ctr_t1q, [$input_ptr], #16 @ AES final-1 block - load plaintext 1221 1222 ins $rk4v.d[0], $res0.d[1] @ GHASH final-2 block - mid 1223 ldr $h3q, [$current_tag, #80] @ load h3l | h3h 1224 ext $h3.16b, $h3.16b, $h3.16b, #8 1225 movi $t0.8b, #0 @ supress further partial tag feed in 1226 1227 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-2 block - mid 1228 eor3 $res1b, $ctr_t1b, $ctr6b, $t1.16b @ AES final-1 block - result 1229 1230 pmull2 $rk2q1, $res0.2d, $h3.2d @ GHASH final-2 block - high 1231 1232 pmull $rk3q1, $res0.1d, $h3.1d @ GHASH final-2 block - low 1233 pmull $rk4v.1q, $rk4v.1d, $h34k.1d @ GHASH final-2 block - mid 1234 1235 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-2 block - high 1236 1237 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-2 block - mid 1238 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-2 block - low 1239.L128_enc_blocks_more_than_1: @ blocks left > 1 1240 1241 st1 { $res1b}, [$output_ptr], #16 @ AES final-1 block - store result 1242 1243 ldr $h2q, [$current_tag, #64] @ load h2l | h2h 1244 ext $h2.16b, $h2.16b, $h2.16b, #8 1245 rev64 $res0b, $res1b @ GHASH final-1 block 1246 ldr $ctr_t1q, [$input_ptr], #16 @ AES final block - load plaintext 1247 1248 eor $res0b, $res0b, $t0.16b @ feed in partial tag 1249 1250 movi $t0.8b, #0 @ supress further partial tag feed in 1251 ins $rk4v.d[0], $res0.d[1] @ GHASH final-1 block - mid 1252 eor3 $res1b, $ctr_t1b, $ctr7b, $t1.16b @ AES final block - result 1253 1254 pmull2 $rk2q1, $res0.2d, $h2.2d @ GHASH final-1 block - high 1255 1256 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-1 block - mid 1257 1258 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k 1259 1260 ins $rk4v.d[1], $rk4v.d[0] @ GHASH final-1 block - mid 1261 1262 pmull $rk3q1, $res0.1d, $h2.1d @ GHASH final-1 block - low 1263 pmull2 $rk4v.1q, $rk4v.2d, $h12k.2d @ GHASH final-1 block - mid 1264 1265 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-1 block - high 1266 1267 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-1 block - mid 1268 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-1 block - low 1269.L128_enc_blocks_less_than_1: @ blocks left <= 1 1270 1271 rev32 $rtmp_ctr.16b, $rtmp_ctr.16b 1272 str $rtmp_ctrq, [$counter] @ store the updated counter 1273 and $bit_length, $bit_length, #127 @ bit_length %= 128 1274 1275 sub $bit_length, $bit_length, #128 @ bit_length -= 128 1276 1277 neg $bit_length, $bit_length @ bit_length = 128 - #bits in input (in range [1,128]) 1278 1279 mvn $temp0_x, xzr @ temp0_x = 0xffffffffffffffff 1280 ld1 { $rk0}, [$output_ptr] @ load existing bytes where the possibly partial last block is to be stored 1281 and $bit_length, $bit_length, #127 @ bit_length %= 128 1282 1283 lsr $temp0_x, $temp0_x, $bit_length @ temp0_x is mask for top 64b of last block 1284 mvn $temp1_x, xzr @ temp1_x = 0xffffffffffffffff 1285 cmp $bit_length, #64 1286 1287 csel $temp2_x, $temp1_x, $temp0_x, lt 1288 csel $temp3_x, $temp0_x, xzr, lt 1289 1290 mov $ctr0.d[1], $temp3_x 1291 mov $ctr0.d[0], $temp2_x @ ctr0b is mask for last block 1292 1293 and $res1b, $res1b, $ctr0b @ possibly partial last block has zeroes in highest bits 1294 1295 rev64 $res0b, $res1b @ GHASH final block 1296 1297 bif $res1b, $rk0, $ctr0b @ insert existing bytes in top end of result before storing 1298 st1 { $res1b}, [$output_ptr] @ store all 16B 1299 1300 eor $res0b, $res0b, $t0.16b @ feed in partial tag 1301 1302 ins $t0.d[0], $res0.d[1] @ GHASH final block - mid 1303 1304 eor $t0.8b, $t0.8b, $res0.8b @ GHASH final block - mid 1305 ldr $h1q, [$current_tag, #32] @ load h1l | h1h 1306 ext $h1.16b, $h1.16b, $h1.16b, #8 1307 1308 pmull $t0.1q, $t0.1d, $h12k.1d @ GHASH final block - mid 1309 1310 pmull2 $rk2q1, $res0.2d, $h1.2d @ GHASH final block - high 1311 eor $acc_mb, $acc_mb, $t0.16b @ GHASH final block - mid 1312 ldr $mod_constantd, [$modulo_constant] @ MODULO - load modulo constant 1313 1314 pmull $rk3q1, $res0.1d, $h1.1d @ GHASH final block - low 1315 1316 eor $acc_hb, $acc_hb, $rk2 @ GHASH final block - high 1317 1318 eor $acc_lb, $acc_lb, $rk3 @ GHASH final block - low 1319 1320 ext $t11.16b, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment 1321 pmull $t12.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid 1322 1323 eor3 $acc_mb, $acc_mb, $acc_hb, $acc_lb @ MODULO - karatsuba tidy up 1324 1325 eor3 $acc_mb, $acc_mb, $t12.16b, $t11.16b @ MODULO - fold into mid 1326 1327 pmull $acc_h.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low 1328 ext $t11.16b, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment 1329 1330 eor3 $acc_lb, $acc_lb, $acc_hb, $t11.16b @ MODULO - fold into low 1331 ext $acc_lb, $acc_lb, $acc_lb, #8 1332 rev64 $acc_lb, $acc_lb 1333 st1 { $acc_l.16b }, [$current_tag] 1334 lsr x0, $bit_length, #3 @ return sizes 1335 1336 ldp d10, d11, [sp, #16] 1337 ldp d12, d13, [sp, #32] 1338 ldp d14, d15, [sp, #48] 1339 ldp d8, d9, [sp], #80 1340 ret 1341 1342.L128_enc_ret: 1343 mov w0, #0x0 1344 ret 1345.size unroll8_eor3_aes_gcm_enc_128_kernel,.-unroll8_eor3_aes_gcm_enc_128_kernel 1346___ 1347 1348######################################################################################### 1349# size_t unroll8_eor3_aes_gcm_dec_128_kernel(const unsigned char *in, 1350# size_t len, 1351# unsigned char *out, 1352# u64 *Xi, 1353# unsigned char ivec[16], 1354# const void *key); 1355# 1356$code.=<<___; 1357.global unroll8_eor3_aes_gcm_dec_128_kernel 1358.type unroll8_eor3_aes_gcm_dec_128_kernel,%function 1359.align 4 1360unroll8_eor3_aes_gcm_dec_128_kernel: 1361 AARCH64_VALID_CALL_TARGET 1362 cbz x1, .L128_dec_ret 1363 stp d8, d9, [sp, #-80]! 1364 mov $counter, x4 1365 mov $cc, x5 1366 stp d10, d11, [sp, #16] 1367 stp d12, d13, [sp, #32] 1368 stp d14, d15, [sp, #48] 1369 mov x5, #0xc200000000000000 1370 stp x5, xzr, [sp, #64] 1371 add $modulo_constant, sp, #64 1372 1373 lsr $main_end_input_ptr, $bit_length, #3 @ byte_len 1374 ld1 { $ctr0b}, [$counter] @ CTR block 0 1375 1376 ldp $rk0q, $rk1q, [$cc, #0] @ load rk0, rk1 1377 sub $main_end_input_ptr, $main_end_input_ptr, #1 @ byte_len - 1 1378 1379 mov $constant_temp, #0x100000000 @ set up counter increment 1380 movi $rctr_inc.16b, #0x0 1381 mov $rctr_inc.d[1], $constant_temp 1382 ld1 { $acc_lb}, [$current_tag] 1383 ext $acc_lb, $acc_lb, $acc_lb, #8 1384 rev64 $acc_lb, $acc_lb 1385 1386 rev32 $rtmp_ctr.16b, $ctr0.16b @ set up reversed counter 1387 1388 aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 0 1389 1390 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 0 1391 1392 rev32 $ctr1.16b, $rtmp_ctr.16b @ CTR block 1 1393 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 1 1394 1395 and $main_end_input_ptr, $main_end_input_ptr, #0xffffffffffffff80 @ number of bytes to be processed in main loop (at least 1 byte must be handled by tail) 1396 1397 rev32 $ctr2.16b, $rtmp_ctr.16b @ CTR block 2 1398 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 2 1399 aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 0 1400 1401 rev32 $ctr3.16b, $rtmp_ctr.16b @ CTR block 3 1402 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 3 1403 1404 aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 1 1405 aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 1 1406 1407 rev32 $ctr4.16b, $rtmp_ctr.16b @ CTR block 4 1408 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 4 1409 1410 rev32 $ctr5.16b, $rtmp_ctr.16b @ CTR block 5 1411 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 5 1412 1413 aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 0 1414 1415 rev32 $ctr6.16b, $rtmp_ctr.16b @ CTR block 6 1416 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 6 1417 aese $ctr5b, $rk0 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 0 1418 1419 aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 0 1420 aese $ctr4b, $rk0 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 0 1421 1422 rev32 $ctr7.16b, $rtmp_ctr.16b @ CTR block 7 1423 1424 aese $ctr6b, $rk0 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 0 1425 aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 1 1426 1427 aese $ctr7b, $rk0 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 0 1428 1429 ldp $rk2q, $rk3q, [$cc, #32] @ load rk2, rk3 1430 1431 aese $ctr6b, $rk1 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 1 1432 aese $ctr5b, $rk1 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 1 1433 1434 aese $ctr4b, $rk1 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 1 1435 aese $ctr7b, $rk1 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 1 1436 1437 aese $ctr7b, $rk2 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 2 1438 aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 2 1439 aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 1 1440 1441 aese $ctr6b, $rk2 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 2 1442 aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 2 1443 aese $ctr5b, $rk2 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 2 1444 1445 aese $ctr4b, $rk2 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 2 1446 aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 2 1447 aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 2 1448 1449 aese $ctr6b, $rk3 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 3 1450 aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 3 1451 1452 ldp $rk4q, $rk5q, [$cc, #64] @ load rk4, rk5 1453 aese $ctr5b, $rk3 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 3 1454 1455 aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 3 1456 aese $ctr7b, $rk3 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 3 1457 1458 aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 3 1459 aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 3 1460 1461 aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 4 1462 aese $ctr7b, $rk4 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 4 1463 aese $ctr4b, $rk3 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 3 1464 1465 aese $ctr6b, $rk4 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 4 1466 aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 4 1467 aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 4 1468 1469 aese $ctr5b, $rk4 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 4 1470 aese $ctr4b, $rk4 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 4 1471 aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 4 1472 1473 ldp $rk6q, $rk7q, [$cc, #96] @ load rk6, rk7 1474 aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 5 1475 aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 5 1476 1477 aese $ctr6b, $rk5 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 5 1478 aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 5 1479 1480 aese $ctr7b, $rk5 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 5 1481 aese $ctr5b, $rk5 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 5 1482 1483 aese $ctr4b, $rk5 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 5 1484 1485 aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 6 1486 aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 6 1487 aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 5 1488 1489 aese $ctr5b, $rk6 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 6 1490 aese $ctr4b, $rk6 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 6 1491 aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 6 1492 1493 aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 6 1494 aese $ctr7b, $rk6 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 6 1495 aese $ctr6b, $rk6 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 6 1496 1497 aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 7 1498 aese $ctr4b, $rk7 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 7 1499 aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 7 1500 1501 aese $ctr7b, $rk7 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 7 1502 aese $ctr5b, $rk7 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 7 1503 ldp $rk8q, $rk9q, [$cc, #128] @ load rk8, rk9 1504 1505 aese $ctr6b, $rk7 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 7 1506 aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 7 1507 aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 7 1508 1509 add $main_end_input_ptr, $main_end_input_ptr, $input_ptr 1510 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 7 1511 1512 aese $ctr6b, $rk8 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 8 1513 aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 8 1514 1515 aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 8 1516 aese $ctr7b, $rk8 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 8 1517 aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 8 1518 1519 aese $ctr5b, $rk8 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 8 1520 aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 8 1521 aese $ctr4b, $rk8 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 8 1522 1523 aese $ctr0b, $rk9 @ AES block 0 - round 9 1524 aese $ctr1b, $rk9 @ AES block 1 - round 9 1525 aese $ctr6b, $rk9 @ AES block 6 - round 9 1526 1527 ldr $rk10q, [$cc, #160] @ load rk10 1528 aese $ctr4b, $rk9 @ AES block 4 - round 9 1529 aese $ctr3b, $rk9 @ AES block 3 - round 9 1530 1531 aese $ctr2b, $rk9 @ AES block 2 - round 9 1532 aese $ctr5b, $rk9 @ AES block 5 - round 9 1533 aese $ctr7b, $rk9 @ AES block 7 - round 9 1534 1535 add $end_input_ptr, $input_ptr, $bit_length, lsr #3 @ end_input_ptr 1536 cmp $input_ptr, $main_end_input_ptr @ check if we have <= 8 blocks 1537 b.ge .L128_dec_tail @ handle tail 1538 1539 ldp $res0q, $res1q, [$input_ptr], #32 @ AES block 0, 1 - load ciphertext 1540 1541 eor3 $ctr0b, $res0b, $ctr0b, $rk10 @ AES block 0 - result 1542 eor3 $ctr1b, $res1b, $ctr1b, $rk10 @ AES block 1 - result 1543 stp $ctr0q, $ctr1q, [$output_ptr], #32 @ AES block 0, 1 - store result 1544 1545 rev32 $ctr0.16b, $rtmp_ctr.16b @ CTR block 8 1546 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8 1547 ldp $res2q, $res3q, [$input_ptr], #32 @ AES block 2, 3 - load ciphertext 1548 1549 ldp $res4q, $res5q, [$input_ptr], #32 @ AES block 4, 5 - load ciphertext 1550 1551 rev32 $ctr1.16b, $rtmp_ctr.16b @ CTR block 9 1552 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 9 1553 ldp $res6q, $res7q, [$input_ptr], #32 @ AES block 6, 7 - load ciphertext 1554 1555 eor3 $ctr3b, $res3b, $ctr3b, $rk10 @ AES block 3 - result 1556 eor3 $ctr2b, $res2b, $ctr2b, $rk10 @ AES block 2 - result 1557 stp $ctr2q, $ctr3q, [$output_ptr], #32 @ AES block 2, 3 - store result 1558 1559 rev32 $ctr2.16b, $rtmp_ctr.16b @ CTR block 10 1560 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 10 1561 1562 eor3 $ctr6b, $res6b, $ctr6b, $rk10 @ AES block 6 - result 1563 1564 rev32 $ctr3.16b, $rtmp_ctr.16b @ CTR block 11 1565 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 11 1566 1567 eor3 $ctr4b, $res4b, $ctr4b, $rk10 @ AES block 4 - result 1568 eor3 $ctr5b, $res5b, $ctr5b, $rk10 @ AES block 5 - result 1569 stp $ctr4q, $ctr5q, [$output_ptr], #32 @ AES block 4, 5 - store result 1570 1571 eor3 $ctr7b, $res7b, $ctr7b, $rk10 @ AES block 7 - result 1572 stp $ctr6q, $ctr7q, [$output_ptr], #32 @ AES block 6, 7 - store result 1573 rev32 $ctr4.16b, $rtmp_ctr.16b @ CTR block 12 1574 1575 cmp $input_ptr, $main_end_input_ptr @ check if we have <= 8 blocks 1576 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 12 1577 b.ge .L128_dec_prepretail @ do prepretail 1578 1579.L128_dec_main_loop: @ main loop start 1580 ldr $h7q, [$current_tag, #176] @ load h7l | h7h 1581 ext $h7.16b, $h7.16b, $h7.16b, #8 1582 ldr $h8q, [$current_tag, #208] @ load h8l | h8h 1583 ext $h8.16b, $h8.16b, $h8.16b, #8 1584 1585 rev64 $res1b, $res1b @ GHASH block 8k+1 1586 rev64 $res0b, $res0b @ GHASH block 8k 1587 ext $acc_lb, $acc_lb, $acc_lb, #8 @ PRE 0 1588 1589 rev64 $res6b, $res6b @ GHASH block 8k+6 1590 ldr $h5q, [$current_tag, #128] @ load h5l | h5h 1591 ext $h5.16b, $h5.16b, $h5.16b, #8 1592 ldr $h6q, [$current_tag, #160] @ load h6l | h6h 1593 ext $h6.16b, $h6.16b, $h6.16b, #8 1594 1595 eor $res0b, $res0b, $acc_lb @ PRE 1 1596 rev32 $ctr5.16b, $rtmp_ctr.16b @ CTR block 8k+13 1597 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+13 1598 1599 rev64 $res2b, $res2b @ GHASH block 8k+2 1600 rev64 $res4b, $res4b @ GHASH block 8k+4 1601 ldp $rk0q, $rk1q, [$cc, #0] @ load rk0, rk1 1602 1603 rev32 $ctr6.16b, $rtmp_ctr.16b @ CTR block 8k+14 1604 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+14 1605 ldr $h56kq, [$current_tag, #144] @ load h6k | h5k 1606 ldr $h78kq, [$current_tag, #192] @ load h8k | h7k 1607 1608 pmull2 $t0.1q, $res1.2d, $h7.2d @ GHASH block 8k+1 - high 1609 pmull2 $acc_h.1q, $res0.2d, $h8.2d @ GHASH block 8k - high 1610 rev64 $res3b, $res3b @ GHASH block 8k+3 1611 1612 rev32 $ctr7.16b, $rtmp_ctr.16b @ CTR block 8k+15 1613 trn1 $acc_m.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid 1614 rev64 $res5b, $res5b @ GHASH block 8k+5 1615 1616 pmull $h7.1q, $res1.1d, $h7.1d @ GHASH block 8k+1 - low 1617 pmull $acc_l.1q, $res0.1d, $h8.1d @ GHASH block 8k - low 1618 trn2 $res0.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid 1619 1620 pmull2 $t1.1q, $res2.2d, $h6.2d @ GHASH block 8k+2 - high 1621 aese $ctr4b, $rk0 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 0 1622 pmull2 $t2.1q, $res3.2d, $h5.2d @ GHASH block 8k+3 - high 1623 1624 aese $ctr6b, $rk0 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 0 1625 aese $ctr5b, $rk0 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 0 1626 aese $ctr7b, $rk0 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 0 1627 1628 aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 0 1629 aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 0 1630 eor $acc_hb, $acc_hb, $t0.16b @ GHASH block 8k+1 - high 1631 1632 aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 0 1633 eor $res0.16b, $res0.16b, $acc_m.16b @ GHASH block 8k, 8k+1 - mid 1634 aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 0 1635 1636 aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 1 1637 eor $acc_lb, $acc_lb, $h7.16b @ GHASH block 8k+1 - low 1638 eor3 $acc_hb, $acc_hb, $t1.16b, $t2.16b @ GHASH block 8k+2, 8k+3 - high 1639 1640 ldp $rk2q, $rk3q, [$cc, #32] @ load rk2, rk3 1641 trn1 $t3.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid 1642 aese $ctr7b, $rk1 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 1 1643 1644 pmull $h6.1q, $res2.1d, $h6.1d @ GHASH block 8k+2 - low 1645 trn2 $res2.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid 1646 pmull2 $acc_m.1q, $res0.2d, $h78k.2d @ GHASH block 8k - mid 1647 1648 ldr $h3q, [$current_tag, #80] @ load h3l | h3h 1649 ext $h3.16b, $h3.16b, $h3.16b, #8 1650 ldr $h4q, [$current_tag, #112] @ load h4l | h4h 1651 ext $h4.16b, $h4.16b, $h4.16b, #8 1652 pmull $h78k.1q, $res0.1d, $h78k.1d @ GHASH block 8k+1 - mid 1653 aese $ctr6b, $rk1 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 1 1654 1655 aese $ctr4b, $rk1 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 1 1656 aese $ctr5b, $rk1 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 1 1657 pmull $h5.1q, $res3.1d, $h5.1d @ GHASH block 8k+3 - low 1658 1659 aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 1 1660 aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 1 1661 aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 1 1662 1663 aese $ctr7b, $rk2 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 2 1664 aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 2 1665 eor3 $acc_lb, $acc_lb, $h6.16b, $h5.16b @ GHASH block 8k+2, 8k+3 - low 1666 1667 aese $ctr4b, $rk2 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 2 1668 eor $res2.16b, $res2.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid 1669 ldr $h1q, [$current_tag, #32] @ load h1l | h1h 1670 ext $h1.16b, $h1.16b, $h1.16b, #8 1671 ldr $h2q, [$current_tag, #64] @ load h2l | h2h 1672 ext $h2.16b, $h2.16b, $h2.16b, #8 1673 1674 eor $acc_mb, $acc_mb, $h78k.16b @ GHASH block 8k+1 - mid 1675 aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 2 1676 aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 2 1677 1678 trn1 $t6.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid 1679 aese $ctr5b, $rk2 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 2 1680 aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 2 1681 1682 aese $ctr6b, $rk2 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 2 1683 pmull2 $t3.1q, $res2.2d, $h56k.2d @ GHASH block 8k+2 - mid 1684 pmull $h56k.1q, $res2.1d, $h56k.1d @ GHASH block 8k+3 - mid 1685 1686 aese $ctr7b, $rk3 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 3 1687 rev64 $res7b, $res7b @ GHASH block 8k+7 1688 pmull2 $t4.1q, $res4.2d, $h4.2d @ GHASH block 8k+4 - high 1689 1690 ldp $rk4q, $rk5q, [$cc, #64] @ load rk4, rk5 1691 pmull $h4.1q, $res4.1d, $h4.1d @ GHASH block 8k+4 - low 1692 eor3 $acc_mb, $acc_mb, $h56k.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid 1693 1694 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k 1695 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k 1696 aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 3 1697 trn2 $res4.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid 1698 1699 aese $ctr4b, $rk3 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 3 1700 aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 3 1701 aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 3 1702 1703 aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 3 1704 aese $ctr6b, $rk3 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 3 1705 aese $ctr5b, $rk3 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 3 1706 1707 pmull2 $t5.1q, $res5.2d, $h3.2d @ GHASH block 8k+5 - high 1708 pmull $h3.1q, $res5.1d, $h3.1d @ GHASH block 8k+5 - low 1709 pmull2 $t7.1q, $res6.2d, $h2.2d @ GHASH block 8k+6 - high 1710 1711 pmull $h2.1q, $res6.1d, $h2.1d @ GHASH block 8k+6 - low 1712 aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 4 1713 aese $ctr7b, $rk4 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 4 1714 1715 eor $res4.16b, $res4.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid 1716 trn1 $t9.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid 1717 aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 4 1718 1719 aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 4 1720 aese $ctr5b, $rk4 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 4 1721 aese $ctr6b, $rk4 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 4 1722 1723 aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 4 1724 aese $ctr4b, $rk4 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 4 1725 trn2 $res6.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid 1726 1727 ldp $rk6q, $rk7q, [$cc, #96] @ load rk6, rk7 1728 aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 5 1729 pmull2 $t6.1q, $res4.2d, $h34k.2d @ GHASH block 8k+4 - mid 1730 1731 aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 5 1732 eor $res6.16b, $res6.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid 1733 aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 5 1734 1735 pmull $h34k.1q, $res4.1d, $h34k.1d @ GHASH block 8k+5 - mid 1736 aese $ctr6b, $rk5 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 5 1737 aese $ctr7b, $rk5 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 5 1738 1739 aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 5 1740 aese $ctr5b, $rk5 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 5 1741 aese $ctr4b, $rk5 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 5 1742 1743 pmull2 $t8.1q, $res7.2d, $h1.2d @ GHASH block 8k+7 - high 1744 eor3 $acc_mb, $acc_mb, $h34k.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid 1745 eor3 $acc_lb, $acc_lb, $h4.16b, $h3.16b @ GHASH block 8k+4, 8k+5 - low 1746 1747 aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 6 1748 eor3 $acc_hb, $acc_hb, $t4.16b, $t5.16b @ GHASH block 8k+4, 8k+5 - high 1749 aese $ctr7b, $rk6 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 6 1750 1751 aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 6 1752 pmull2 $t9.1q, $res6.2d, $h12k.2d @ GHASH block 8k+6 - mid 1753 aese $ctr6b, $rk6 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 6 1754 1755 aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 6 1756 aese $ctr5b, $rk6 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 6 1757 pmull $h1.1q, $res7.1d, $h1.1d @ GHASH block 8k+7 - low 1758 1759 pmull $h12k.1q, $res6.1d, $h12k.1d @ GHASH block 8k+7 - mid 1760 aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 6 1761 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+15 1762 1763 eor3 $acc_hb, $acc_hb, $t7.16b, $t8.16b @ GHASH block 8k+6, 8k+7 - high 1764 aese $ctr4b, $rk6 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 6 1765 ldp $rk8q, $rk9q, [$cc, #128] @ load rk8, rk9 1766 1767 ldr $mod_constantd, [$modulo_constant] @ MODULO - load modulo constant 1768 eor3 $acc_lb, $acc_lb, $h2.16b, $h1.16b @ GHASH block 8k+6, 8k+7 - low 1769 aese $ctr5b, $rk7 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 7 1770 1771 rev32 $h1.16b, $rtmp_ctr.16b @ CTR block 8k+16 1772 eor3 $acc_mb, $acc_mb, $h12k.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid 1773 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+16 1774 1775 aese $ctr6b, $rk7 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 7 1776 aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 7 1777 aese $ctr7b, $rk7 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 7 1778 1779 aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 7 1780 aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 7 1781 rev32 $h2.16b, $rtmp_ctr.16b @ CTR block 8k+17 1782 1783 aese $ctr4b, $rk7 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 7 1784 ext $t11.16b, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment 1785 pmull $t12.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid 1786 1787 eor3 $acc_mb, $acc_mb, $acc_hb, $acc_lb @ MODULO - karatsuba tidy up 1788 aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 7 1789 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+17 1790 1791 aese $ctr5b, $rk8 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 8 1792 aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 8 1793 ldp $res0q, $res1q, [$input_ptr], #32 @ AES block 8k+8, 8k+9 - load ciphertext 1794 1795 ldp $res2q, $res3q, [$input_ptr], #32 @ AES block 8k+10, 8k+11 - load ciphertext 1796 aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 8 1797 rev32 $h3.16b, $rtmp_ctr.16b @ CTR block 8k+18 1798 1799 ldp $res4q, $res5q, [$input_ptr], #32 @ AES block 8k+12, 8k+13 - load ciphertext 1800 aese $ctr4b, $rk8 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 8 1801 eor3 $acc_mb, $acc_mb, $t12.16b, $t11.16b @ MODULO - fold into mid 1802 1803 ldp $res6q, $res7q, [$input_ptr], #32 @ AES block 8k+14, 8k+15 - load ciphertext 1804 aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 8 1805 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+18 1806 1807 aese $ctr7b, $rk8 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 8 1808 aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 8 1809 aese $ctr6b, $rk8 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 8 1810 1811 aese $ctr0b, $rk9 @ AES block 8k+8 - round 9 1812 aese $ctr1b, $rk9 @ AES block 8k+9 - round 9 1813 ldr $rk10q, [$cc, #160] @ load rk10 1814 1815 aese $ctr6b, $rk9 @ AES block 8k+14 - round 9 1816 pmull $acc_h.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low 1817 aese $ctr2b, $rk9 @ AES block 8k+10 - round 9 1818 1819 aese $ctr7b, $rk9 @ AES block 8k+15 - round 9 1820 aese $ctr4b, $rk9 @ AES block 8k+12 - round 9 1821 ext $t11.16b, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment 1822 1823 rev32 $h4.16b, $rtmp_ctr.16b @ CTR block 8k+19 1824 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+19 1825 1826 aese $ctr3b, $rk9 @ AES block 8k+11 - round 9 1827 aese $ctr5b, $rk9 @ AES block 8k+13 - round 9 1828 eor3 $ctr1b, $res1b, $ctr1b, $rk10 @ AES block 8k+9 - result 1829 1830 eor3 $ctr0b, $res0b, $ctr0b, $rk10 @ AES block 8k+8 - result 1831 eor3 $ctr7b, $res7b, $ctr7b, $rk10 @ AES block 8k+15 - result 1832 eor3 $ctr6b, $res6b, $ctr6b, $rk10 @ AES block 8k+14 - result 1833 1834 eor3 $ctr2b, $res2b, $ctr2b, $rk10 @ AES block 8k+10 - result 1835 stp $ctr0q, $ctr1q, [$output_ptr], #32 @ AES block 8k+8, 8k+9 - store result 1836 mov $ctr1.16b, $h2.16b @ CTR block 8k+17 1837 1838 eor3 $ctr4b, $res4b, $ctr4b, $rk10 @ AES block 8k+12 - result 1839 eor3 $acc_lb, $acc_lb, $acc_hb, $t11.16b @ MODULO - fold into low 1840 mov $ctr0.16b, $h1.16b @ CTR block 8k+16 1841 1842 eor3 $ctr3b, $res3b, $ctr3b, $rk10 @ AES block 8k+11 - result 1843 cmp $input_ptr, $main_end_input_ptr @ LOOP CONTROL 1844 stp $ctr2q, $ctr3q, [$output_ptr], #32 @ AES block 8k+10, 8k+11 - store result 1845 1846 eor3 $ctr5b, $res5b, $ctr5b, $rk10 @ AES block 8k+13 - result 1847 mov $ctr2.16b, $h3.16b @ CTR block 8k+18 1848 1849 stp $ctr4q, $ctr5q, [$output_ptr], #32 @ AES block 8k+12, 8k+13 - store result 1850 rev32 $ctr4.16b, $rtmp_ctr.16b @ CTR block 8k+20 1851 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+20 1852 1853 stp $ctr6q, $ctr7q, [$output_ptr], #32 @ AES block 8k+14, 8k+15 - store result 1854 mov $ctr3.16b, $h4.16b @ CTR block 8k+19 1855 b.lt .L128_dec_main_loop 1856 1857.L128_dec_prepretail: @ PREPRETAIL 1858 rev64 $res3b, $res3b @ GHASH block 8k+3 1859 ext $acc_lb, $acc_lb, $acc_lb, #8 @ PRE 0 1860 rev64 $res0b, $res0b @ GHASH block 8k 1861 1862 rev64 $res2b, $res2b @ GHASH block 8k+2 1863 rev32 $ctr5.16b, $rtmp_ctr.16b @ CTR block 8k+13 1864 ldp $rk0q, $rk1q, [$cc, #0] @ load rk0, rk1 1865 1866 ldr $h7q, [$current_tag, #176] @ load h7l | h7h 1867 ext $h7.16b, $h7.16b, $h7.16b, #8 1868 ldr $h8q, [$current_tag, #208] @ load h8l | h8h 1869 ext $h8.16b, $h8.16b, $h8.16b, #8 1870 eor $res0b, $res0b, $acc_lb @ PRE 1 1871 rev64 $res1b, $res1b @ GHASH block 8k+1 1872 1873 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+13 1874 ldr $h5q, [$current_tag, #128] @ load h5l | h5h 1875 ext $h5.16b, $h5.16b, $h5.16b, #8 1876 ldr $h6q, [$current_tag, #160] @ load h6l | h6h 1877 ext $h6.16b, $h6.16b, $h6.16b, #8 1878 rev64 $res5b, $res5b @ GHASH block 8k+5 1879 1880 rev64 $res4b, $res4b @ GHASH block 8k+4 1881 1882 rev64 $res6b, $res6b @ GHASH block 8k+6 1883 1884 ldr $h56kq, [$current_tag, #144] @ load h6k | h5k 1885 ldr $h78kq, [$current_tag, #192] @ load h8k | h7k 1886 rev32 $ctr6.16b, $rtmp_ctr.16b @ CTR block 8k+14 1887 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+14 1888 1889 pmull2 $t0.1q, $res1.2d, $h7.2d @ GHASH block 8k+1 - high 1890 pmull $acc_l.1q, $res0.1d, $h8.1d @ GHASH block 8k - low 1891 pmull2 $acc_h.1q, $res0.2d, $h8.2d @ GHASH block 8k - high 1892 1893 trn1 $acc_m.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid 1894 trn2 $res0.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid 1895 pmull2 $t1.1q, $res2.2d, $h6.2d @ GHASH block 8k+2 - high 1896 1897 pmull $h7.1q, $res1.1d, $h7.1d @ GHASH block 8k+1 - low 1898 pmull2 $t2.1q, $res3.2d, $h5.2d @ GHASH block 8k+3 - high 1899 aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 0 1900 1901 eor $acc_hb, $acc_hb, $t0.16b @ GHASH block 8k+1 - high 1902 aese $ctr4b, $rk0 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 0 1903 eor $res0.16b, $res0.16b, $acc_m.16b @ GHASH block 8k, 8k+1 - mid 1904 1905 pmull $h6.1q, $res2.1d, $h6.1d @ GHASH block 8k+2 - low 1906 rev32 $ctr7.16b, $rtmp_ctr.16b @ CTR block 8k+15 1907 aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 0 1908 1909 eor3 $acc_hb, $acc_hb, $t1.16b, $t2.16b @ GHASH block 8k+2, 8k+3 - high 1910 trn1 $t3.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid 1911 trn2 $res2.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid 1912 1913 aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 0 1914 aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 0 1915 aese $ctr5b, $rk0 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 0 1916 1917 pmull2 $acc_m.1q, $res0.2d, $h78k.2d @ GHASH block 8k - mid 1918 pmull $h78k.1q, $res0.1d, $h78k.1d @ GHASH block 8k+1 - mid 1919 pmull $h5.1q, $res3.1d, $h5.1d @ GHASH block 8k+3 - low 1920 1921 aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 1 1922 aese $ctr7b, $rk0 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 0 1923 aese $ctr6b, $rk0 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 0 1924 1925 eor $acc_lb, $acc_lb, $h7.16b @ GHASH block 8k+1 - low 1926 eor $res2.16b, $res2.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid 1927 eor $acc_mb, $acc_mb, $h78k.16b @ GHASH block 8k+1 - mid 1928 1929 aese $ctr6b, $rk1 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 1 1930 aese $ctr4b, $rk1 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 1 1931 aese $ctr5b, $rk1 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 1 1932 1933 ldp $rk2q, $rk3q, [$cc, #32] @ load rk2, rk3 1934 eor3 $acc_lb, $acc_lb, $h6.16b, $h5.16b @ GHASH block 8k+2, 8k+3 - low 1935 pmull2 $t3.1q, $res2.2d, $h56k.2d @ GHASH block 8k+2 - mid 1936 1937 ldr $h3q, [$current_tag, #80] @ load h3l | h3h 1938 ext $h3.16b, $h3.16b, $h3.16b, #8 1939 ldr $h4q, [$current_tag, #112] @ load h4l | h4h 1940 ext $h4.16b, $h4.16b, $h4.16b, #8 1941 aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 1 1942 pmull $h56k.1q, $res2.1d, $h56k.1d @ GHASH block 8k+3 - mid 1943 1944 aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 1 1945 aese $ctr7b, $rk1 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 1 1946 aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 1 1947 1948 ldr $h1q, [$current_tag, #32] @ load h1l | h1h 1949 ext $h1.16b, $h1.16b, $h1.16b, #8 1950 ldr $h2q, [$current_tag, #64] @ load h2l | h2h 1951 ext $h2.16b, $h2.16b, $h2.16b, #8 1952 eor3 $acc_mb, $acc_mb, $h56k.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid 1953 1954 aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 2 1955 aese $ctr6b, $rk2 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 2 1956 aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 2 1957 1958 aese $ctr4b, $rk2 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 2 1959 trn1 $t6.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid 1960 aese $ctr7b, $rk2 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 2 1961 1962 aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 2 1963 aese $ctr5b, $rk2 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 2 1964 aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 2 1965 1966 pmull2 $t4.1q, $res4.2d, $h4.2d @ GHASH block 8k+4 - high 1967 pmull $h4.1q, $res4.1d, $h4.1d @ GHASH block 8k+4 - low 1968 trn2 $res4.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid 1969 1970 ldp $rk4q, $rk5q, [$cc, #64] @ load rk4, rk5 1971 rev64 $res7b, $res7b @ GHASH block 8k+7 1972 aese $ctr6b, $rk3 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 3 1973 1974 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k 1975 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k 1976 pmull2 $t5.1q, $res5.2d, $h3.2d @ GHASH block 8k+5 - high 1977 pmull $h3.1q, $res5.1d, $h3.1d @ GHASH block 8k+5 - low 1978 1979 aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 3 1980 aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 3 1981 trn1 $t9.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid 1982 1983 pmull2 $t7.1q, $res6.2d, $h2.2d @ GHASH block 8k+6 - high 1984 pmull $h2.1q, $res6.1d, $h2.1d @ GHASH block 8k+6 - low 1985 trn2 $res6.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid 1986 1987 aese $ctr4b, $rk3 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 3 1988 aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 3 1989 aese $ctr7b, $rk3 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 3 1990 1991 aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 3 1992 aese $ctr5b, $rk3 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 3 1993 eor $res4.16b, $res4.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid 1994 1995 eor3 $acc_hb, $acc_hb, $t4.16b, $t5.16b @ GHASH block 8k+4, 8k+5 - high 1996 aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 4 1997 aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 4 1998 1999 eor $res6.16b, $res6.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid 2000 aese $ctr5b, $rk4 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 4 2001 pmull2 $t6.1q, $res4.2d, $h34k.2d @ GHASH block 8k+4 - mid 2002 2003 aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 4 2004 aese $ctr6b, $rk4 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 4 2005 aese $ctr4b, $rk4 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 4 2006 2007 aese $ctr7b, $rk4 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 4 2008 aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 4 2009 pmull $h34k.1q, $res4.1d, $h34k.1d @ GHASH block 8k+5 - mid 2010 2011 pmull2 $t8.1q, $res7.2d, $h1.2d @ GHASH block 8k+7 - high 2012 pmull2 $t9.1q, $res6.2d, $h12k.2d @ GHASH block 8k+6 - mid 2013 pmull $h12k.1q, $res6.1d, $h12k.1d @ GHASH block 8k+7 - mid 2014 2015 ldp $rk6q, $rk7q, [$cc, #96] @ load rk6, rk7 2016 eor3 $acc_mb, $acc_mb, $h34k.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid 2017 aese $ctr6b, $rk5 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 5 2018 2019 ldr $mod_constantd, [$modulo_constant] @ MODULO - load modulo constant 2020 pmull $h1.1q, $res7.1d, $h1.1d @ GHASH block 8k+7 - low 2021 eor3 $acc_lb, $acc_lb, $h4.16b, $h3.16b @ GHASH block 8k+4, 8k+5 - low 2022 2023 aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 5 2024 aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 5 2025 aese $ctr4b, $rk5 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 5 2026 2027 aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 5 2028 aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 5 2029 aese $ctr5b, $rk5 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 5 2030 2031 aese $ctr7b, $rk5 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 5 2032 eor3 $acc_mb, $acc_mb, $h12k.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid 2033 eor3 $acc_lb, $acc_lb, $h2.16b, $h1.16b @ GHASH block 8k+6, 8k+7 - low 2034 2035 aese $ctr4b, $rk6 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 6 2036 aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 6 2037 aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 6 2038 2039 eor3 $acc_hb, $acc_hb, $t7.16b, $t8.16b @ GHASH block 8k+6, 8k+7 - high 2040 aese $ctr5b, $rk6 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 6 2041 aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 6 2042 2043 aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 6 2044 aese $ctr6b, $rk6 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 6 2045 aese $ctr7b, $rk6 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 6 2046 2047 aese $ctr4b, $rk7 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 7 2048 eor3 $acc_mb, $acc_mb, $acc_hb, $acc_lb @ MODULO - karatsuba tidy up 2049 ldp $rk8q, $rk9q, [$cc, #128] @ load rk8, rk9 2050 2051 pmull $t12.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid 2052 aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 7 2053 ext $t11.16b, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment 2054 2055 aese $ctr5b, $rk7 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 7 2056 aese $ctr6b, $rk7 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 7 2057 aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 7 2058 2059 aese $ctr7b, $rk7 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 7 2060 aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 7 2061 aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 7 2062 2063 eor3 $acc_mb, $acc_mb, $t12.16b, $t11.16b @ MODULO - fold into mid 2064 ldr $rk10q, [$cc, #160] @ load rk10 2065 2066 aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 8 2067 aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 8 2068 2069 pmull $acc_h.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low 2070 aese $ctr6b, $rk8 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 8 2071 ext $t11.16b, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment 2072 2073 aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 8 2074 aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 8 2075 aese $ctr7b, $rk8 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 8 2076 2077 aese $ctr6b, $rk9 @ AES block 8k+14 - round 9 2078 aese $ctr5b, $rk8 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 8 2079 aese $ctr4b, $rk8 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 8 2080 2081 eor3 $acc_lb, $acc_lb, $acc_hb, $t11.16b @ MODULO - fold into low 2082 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+15 2083 aese $ctr2b, $rk9 @ AES block 8k+10 - round 9 2084 2085 aese $ctr3b, $rk9 @ AES block 8k+11 - round 9 2086 aese $ctr5b, $rk9 @ AES block 8k+13 - round 9 2087 aese $ctr0b, $rk9 @ AES block 8k+8 - round 9 2088 2089 aese $ctr4b, $rk9 @ AES block 8k+12 - round 9 2090 aese $ctr1b, $rk9 @ AES block 8k+9 - round 9 2091 aese $ctr7b, $rk9 @ AES block 8k+15 - round 9 2092 2093.L128_dec_tail: @ TAIL 2094 2095 mov $t1.16b, $rk10 2096 sub $main_end_input_ptr, $end_input_ptr, $input_ptr @ main_end_input_ptr is number of bytes left to process 2097 2098 cmp $main_end_input_ptr, #112 2099 2100 ldp $h78kq, $h8q, [$current_tag, #192] @ load h8k | h7k 2101 ext $h8.16b, $h8.16b, $h8.16b, #8 2102 ldr $res1q, [$input_ptr], #16 @ AES block 8k+8 - load ciphertext 2103 2104 ldp $h5q, $h56kq, [$current_tag, #128] @ load h5l | h5h 2105 ext $h5.16b, $h5.16b, $h5.16b, #8 2106 ext $t0.16b, $acc_lb, $acc_lb, #8 @ prepare final partial tag 2107 2108 ldp $h6q, $h7q, [$current_tag, #160] @ load h6l | h6h 2109 ext $h6.16b, $h6.16b, $h6.16b, #8 2110 ext $h7.16b, $h7.16b, $h7.16b, #8 2111 2112 eor3 $res4b, $res1b, $ctr0b, $t1.16b @ AES block 8k+8 - result 2113 b.gt .L128_dec_blocks_more_than_7 2114 2115 cmp $main_end_input_ptr, #96 2116 mov $ctr7b, $ctr6b 2117 movi $acc_l.8b, #0 2118 2119 movi $acc_h.8b, #0 2120 mov $ctr6b, $ctr5b 2121 mov $ctr5b, $ctr4b 2122 2123 mov $ctr4b, $ctr3b 2124 mov $ctr3b, $ctr2b 2125 mov $ctr2b, $ctr1b 2126 2127 movi $acc_m.8b, #0 2128 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 2129 b.gt .L128_dec_blocks_more_than_6 2130 2131 cmp $main_end_input_ptr, #80 2132 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 2133 2134 mov $ctr7b, $ctr6b 2135 mov $ctr6b, $ctr5b 2136 mov $ctr5b, $ctr4b 2137 2138 mov $ctr4b, $ctr3b 2139 mov $ctr3b, $ctr1b 2140 b.gt .L128_dec_blocks_more_than_5 2141 2142 cmp $main_end_input_ptr, #64 2143 2144 mov $ctr7b, $ctr6b 2145 mov $ctr6b, $ctr5b 2146 mov $ctr5b, $ctr4b 2147 2148 mov $ctr4b, $ctr1b 2149 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 2150 b.gt .L128_dec_blocks_more_than_4 2151 2152 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 2153 mov $ctr7b, $ctr6b 2154 mov $ctr6b, $ctr5b 2155 2156 mov $ctr5b, $ctr1b 2157 cmp $main_end_input_ptr, #48 2158 b.gt .L128_dec_blocks_more_than_3 2159 2160 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 2161 mov $ctr7b, $ctr6b 2162 cmp $main_end_input_ptr, #32 2163 2164 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k 2165 mov $ctr6b, $ctr1b 2166 b.gt .L128_dec_blocks_more_than_2 2167 2168 cmp $main_end_input_ptr, #16 2169 2170 mov $ctr7b, $ctr1b 2171 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 2172 b.gt L128_dec_blocks_more_than_1 2173 2174 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 2175 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k 2176 b .L128_dec_blocks_less_than_1 2177.L128_dec_blocks_more_than_7: @ blocks left > 7 2178 rev64 $res0b, $res1b @ GHASH final-7 block 2179 2180 eor $res0b, $res0b, $t0.16b @ feed in partial tag 2181 2182 ins $acc_m.d[0], $h78k.d[1] @ GHASH final-7 block - mid 2183 2184 pmull $acc_l.1q, $res0.1d, $h8.1d @ GHASH final-7 block - low 2185 ins $rk4v.d[0], $res0.d[1] @ GHASH final-7 block - mid 2186 2187 movi $t0.8b, #0 @ supress further partial tag feed in 2188 ldr $res1q, [$input_ptr], #16 @ AES final-6 block - load ciphertext 2189 2190 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-7 block - mid 2191 2192 pmull2 $acc_h.1q, $res0.2d, $h8.2d @ GHASH final-7 block - high 2193 st1 { $res4b}, [$output_ptr], #16 @ AES final-7 block - store result 2194 eor3 $res4b, $res1b, $ctr1b, $t1.16b @ AES final-6 block - result 2195 2196 pmull $acc_m.1q, $rk4v.1d, $acc_m.1d @ GHASH final-7 block - mid 2197.L128_dec_blocks_more_than_6: @ blocks left > 6 2198 2199 rev64 $res0b, $res1b @ GHASH final-6 block 2200 2201 eor $res0b, $res0b, $t0.16b @ feed in partial tag 2202 2203 ins $rk4v.d[0], $res0.d[1] @ GHASH final-6 block - mid 2204 2205 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-6 block - mid 2206 2207 pmull $rk3q1, $res0.1d, $h7.1d @ GHASH final-6 block - low 2208 ldr $res1q, [$input_ptr], #16 @ AES final-5 block - load ciphertext 2209 movi $t0.8b, #0 @ supress further partial tag feed in 2210 2211 pmull $rk4v.1q, $rk4v.1d, $h78k.1d @ GHASH final-6 block - mid 2212 st1 { $res4b}, [$output_ptr], #16 @ AES final-6 block - store result 2213 pmull2 $rk2q1, $res0.2d, $h7.2d @ GHASH final-6 block - high 2214 2215 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-6 block - low 2216 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-6 block - high 2217 2218 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-6 block - mid 2219 eor3 $res4b, $res1b, $ctr2b, $t1.16b @ AES final-5 block - result 2220.L128_dec_blocks_more_than_5: @ blocks left > 5 2221 2222 rev64 $res0b, $res1b @ GHASH final-5 block 2223 2224 ldr $res1q, [$input_ptr], #16 @ AES final-4 block - load ciphertext 2225 st1 { $res4b}, [$output_ptr], #16 @ AES final-5 block - store result 2226 2227 eor $res0b, $res0b, $t0.16b @ feed in partial tag 2228 2229 ins $rk4v.d[0], $res0.d[1] @ GHASH final-5 block - mid 2230 2231 eor3 $res4b, $res1b, $ctr3b, $t1.16b @ AES final-4 block - result 2232 2233 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-5 block - mid 2234 2235 ins $rk4v.d[1], $rk4v.d[0] @ GHASH final-5 block - mid 2236 pmull $rk3q1, $res0.1d, $h6.1d @ GHASH final-5 block - low 2237 movi $t0.8b, #0 @ supress further partial tag feed in 2238 2239 pmull2 $rk4v.1q, $rk4v.2d, $h56k.2d @ GHASH final-5 block - mid 2240 pmull2 $rk2q1, $res0.2d, $h6.2d @ GHASH final-5 block - high 2241 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-5 block - low 2242 2243 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-5 block - mid 2244 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-5 block - high 2245.L128_dec_blocks_more_than_4: @ blocks left > 4 2246 2247 rev64 $res0b, $res1b @ GHASH final-4 block 2248 2249 eor $res0b, $res0b, $t0.16b @ feed in partial tag 2250 ldr $res1q, [$input_ptr], #16 @ AES final-3 block - load ciphertext 2251 2252 ins $rk4v.d[0], $res0.d[1] @ GHASH final-4 block - mid 2253 movi $t0.8b, #0 @ supress further partial tag feed in 2254 pmull2 $rk2q1, $res0.2d, $h5.2d @ GHASH final-4 block - high 2255 2256 pmull $rk3q1, $res0.1d, $h5.1d @ GHASH final-4 block - low 2257 2258 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-4 block - high 2259 2260 st1 { $res4b}, [$output_ptr], #16 @ AES final-4 block - store result 2261 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-4 block - mid 2262 2263 eor3 $res4b, $res1b, $ctr4b, $t1.16b @ AES final-3 block - result 2264 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-4 block - low 2265 2266 pmull $rk4v.1q, $rk4v.1d, $h56k.1d @ GHASH final-4 block - mid 2267 2268 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-4 block - mid 2269.L128_dec_blocks_more_than_3: @ blocks left > 3 2270 2271 st1 { $res4b}, [$output_ptr], #16 @ AES final-3 block - store result 2272 rev64 $res0b, $res1b @ GHASH final-3 block 2273 2274 eor $res0b, $res0b, $t0.16b @ feed in partial tag 2275 2276 ins $rk4v.d[0], $res0.d[1] @ GHASH final-3 block - mid 2277 2278 ldr $h4q, [$current_tag, #112] @ load h4l | h4h 2279 ext $h4.16b, $h4.16b, $h4.16b, #8 2280 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k 2281 2282 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-3 block - mid 2283 2284 ldr $res1q, [$input_ptr], #16 @ AES final-2 block - load ciphertext 2285 2286 ins $rk4v.d[1], $rk4v.d[0] @ GHASH final-3 block - mid 2287 pmull $rk3q1, $res0.1d, $h4.1d @ GHASH final-3 block - low 2288 pmull2 $rk2q1, $res0.2d, $h4.2d @ GHASH final-3 block - high 2289 2290 movi $t0.8b, #0 @ supress further partial tag feed in 2291 eor3 $res4b, $res1b, $ctr5b, $t1.16b @ AES final-2 block - result 2292 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-3 block - low 2293 2294 pmull2 $rk4v.1q, $rk4v.2d, $h34k.2d @ GHASH final-3 block - mid 2295 2296 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-3 block - high 2297 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-3 block - mid 2298.L128_dec_blocks_more_than_2: @ blocks left > 2 2299 2300 rev64 $res0b, $res1b @ GHASH final-2 block 2301 2302 st1 { $res4b}, [$output_ptr], #16 @ AES final-2 block - store result 2303 2304 eor $res0b, $res0b, $t0.16b @ feed in partial tag 2305 ldr $h3q, [$current_tag, #80] @ load h3l | h3h 2306 ext $h3.16b, $h3.16b, $h3.16b, #8 2307 movi $t0.8b, #0 @ supress further partial tag feed in 2308 2309 ins $rk4v.d[0], $res0.d[1] @ GHASH final-2 block - mid 2310 2311 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-2 block - mid 2312 2313 pmull $rk3q1, $res0.1d, $h3.1d @ GHASH final-2 block - low 2314 2315 pmull2 $rk2q1, $res0.2d, $h3.2d @ GHASH final-2 block - high 2316 pmull $rk4v.1q, $rk4v.1d, $h34k.1d @ GHASH final-2 block - mid 2317 ldr $res1q, [$input_ptr], #16 @ AES final-1 block - load ciphertext 2318 2319 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-2 block - mid 2320 2321 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-2 block - low 2322 2323 eor3 $res4b, $res1b, $ctr6b, $t1.16b @ AES final-1 block - result 2324 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-2 block - high 2325.L128_dec_blocks_more_than_1: @ blocks left > 1 2326 2327 st1 { $res4b}, [$output_ptr], #16 @ AES final-1 block - store result 2328 rev64 $res0b, $res1b @ GHASH final-1 block 2329 2330 ldr $h2q, [$current_tag, #64] @ load h2l | h2h 2331 ext $h2.16b, $h2.16b, $h2.16b, #8 2332 2333 eor $res0b, $res0b, $t0.16b @ feed in partial tag 2334 2335 movi $t0.8b, #0 @ supress further partial tag feed in 2336 2337 ins $rk4v.d[0], $res0.d[1] @ GHASH final-1 block - mid 2338 2339 ldr $res1q, [$input_ptr], #16 @ AES final block - load ciphertext 2340 pmull2 $rk2q1, $res0.2d, $h2.2d @ GHASH final-1 block - high 2341 2342 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-1 block - mid 2343 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-1 block - high 2344 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k 2345 2346 ins $rk4v.d[1], $rk4v.d[0] @ GHASH final-1 block - mid 2347 eor3 $res4b, $res1b, $ctr7b, $t1.16b @ AES final block - result 2348 2349 pmull $rk3q1, $res0.1d, $h2.1d @ GHASH final-1 block - low 2350 2351 pmull2 $rk4v.1q, $rk4v.2d, $h12k.2d @ GHASH final-1 block - mid 2352 2353 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-1 block - low 2354 2355 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-1 block - mid 2356.L128_dec_blocks_less_than_1: @ blocks left <= 1 2357 2358 and $bit_length, $bit_length, #127 @ bit_length %= 128 2359 2360 sub $bit_length, $bit_length, #128 @ bit_length -= 128 2361 2362 neg $bit_length, $bit_length @ bit_length = 128 - #bits in input (in range [1,128]) 2363 2364 mvn $temp0_x, xzr @ temp0_x = 0xffffffffffffffff 2365 and $bit_length, $bit_length, #127 @ bit_length %= 128 2366 2367 lsr $temp0_x, $temp0_x, $bit_length @ temp0_x is mask for top 64b of last block 2368 cmp $bit_length, #64 2369 mvn $temp1_x, xzr @ temp1_x = 0xffffffffffffffff 2370 2371 csel $temp2_x, $temp1_x, $temp0_x, lt 2372 csel $temp3_x, $temp0_x, xzr, lt 2373 2374 mov $ctr0.d[1], $temp3_x 2375 mov $ctr0.d[0], $temp2_x @ ctr0b is mask for last block 2376 2377 ldr $h1q, [$current_tag, #32] @ load h1l | h1h 2378 ext $h1.16b, $h1.16b, $h1.16b, #8 2379 ld1 { $rk0}, [$output_ptr] @ load existing bytes where the possibly partial last block is to be stored 2380 2381 and $res1b, $res1b, $ctr0b @ possibly partial last block has zeroes in highest bits 2382 2383 rev64 $res0b, $res1b @ GHASH final block 2384 2385 eor $res0b, $res0b, $t0.16b @ feed in partial tag 2386 2387 pmull2 $rk2q1, $res0.2d, $h1.2d @ GHASH final block - high 2388 ins $t0.d[0], $res0.d[1] @ GHASH final block - mid 2389 2390 eor $acc_hb, $acc_hb, $rk2 @ GHASH final block - high 2391 eor $t0.8b, $t0.8b, $res0.8b @ GHASH final block - mid 2392 2393 bif $res4b, $rk0, $ctr0b @ insert existing bytes in top end of result before storing 2394 2395 pmull $t0.1q, $t0.1d, $h12k.1d @ GHASH final block - mid 2396 st1 { $res4b}, [$output_ptr] @ store all 16B 2397 2398 pmull $rk3q1, $res0.1d, $h1.1d @ GHASH final block - low 2399 2400 eor $acc_mb, $acc_mb, $t0.16b @ GHASH final block - mid 2401 ldr $mod_constantd, [$modulo_constant] @ MODULO - load modulo constant 2402 2403 eor $acc_lb, $acc_lb, $rk3 @ GHASH final block - low 2404 2405 eor $t10.16b, $acc_hb, $acc_lb @ MODULO - karatsuba tidy up 2406 2407 pmull $t11.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid 2408 ext $acc_hb, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment 2409 2410 eor $acc_mb, $acc_mb, $t10.16b @ MODULO - karatsuba tidy up 2411 2412 eor3 $acc_mb, $acc_mb, $acc_hb, $t11.16b @ MODULO - fold into mid 2413 2414 pmull $acc_h.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low 2415 ext $acc_mb, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment 2416 2417 eor3 $acc_lb, $acc_lb, $acc_mb, $acc_hb @ MODULO - fold into low 2418 ext $acc_lb, $acc_lb, $acc_lb, #8 2419 rev64 $acc_lb, $acc_lb 2420 st1 { $acc_l.16b }, [$current_tag] 2421 rev32 $rtmp_ctr.16b, $rtmp_ctr.16b 2422 2423 str $rtmp_ctrq, [$counter] @ store the updated counter 2424 2425 lsr x0, $bit_length, #3 2426 2427 ldp d10, d11, [sp, #16] 2428 ldp d12, d13, [sp, #32] 2429 ldp d14, d15, [sp, #48] 2430 ldp d8, d9, [sp], #80 2431 ret 2432.L128_dec_ret: 2433 mov w0, #0x0 2434 ret 2435.size unroll8_eor3_aes_gcm_dec_128_kernel,.-unroll8_eor3_aes_gcm_dec_128_kernel 2436___ 2437} 2438 2439{ 2440my ($end_input_ptr,$main_end_input_ptr,$temp0_x,$temp1_x)=map("x$_",(4..7)); 2441my ($temp2_x,$temp3_x)=map("x$_",(13..14)); 2442my ($ctr0b,$ctr1b,$ctr2b,$ctr3b,$ctr4b,$ctr5b,$ctr6b,$ctr7b,$res0b,$res1b,$res2b,$res3b,$res4b,$res5b,$res6b,$res7b)=map("v$_.16b",(0..15)); 2443my ($ctr0,$ctr1,$ctr2,$ctr3,$ctr4,$ctr5,$ctr6,$ctr7,$res0,$res1,$res2,$res3,$res4,$res5,$res6,$res7)=map("v$_",(0..15)); 2444my ($ctr0d,$ctr1d,$ctr2d,$ctr3d,$ctr4d,$ctr5d,$ctr6d,$ctr7d)=map("d$_",(0..7)); 2445my ($ctr0q,$ctr1q,$ctr2q,$ctr3q,$ctr4q,$ctr5q,$ctr6q,$ctr7q)=map("q$_",(0..7)); 2446my ($res0q,$res1q,$res2q,$res3q,$res4q,$res5q,$res6q,$res7q)=map("q$_",(8..15)); 2447 2448my ($ctr_t0,$ctr_t1,$ctr_t2,$ctr_t3,$ctr_t4,$ctr_t5,$ctr_t6,$ctr_t7)=map("v$_",(8..15)); 2449my ($ctr_t0b,$ctr_t1b,$ctr_t2b,$ctr_t3b,$ctr_t4b,$ctr_t5b,$ctr_t6b,$ctr_t7b)=map("v$_.16b",(8..15)); 2450my ($ctr_t0q,$ctr_t1q,$ctr_t2q,$ctr_t3q,$ctr_t4q,$ctr_t5q,$ctr_t6q,$ctr_t7q)=map("q$_",(8..15)); 2451 2452my ($acc_hb,$acc_mb,$acc_lb)=map("v$_.16b",(17..19)); 2453my ($acc_h,$acc_m,$acc_l)=map("v$_",(17..19)); 2454 2455my ($h1,$h12k,$h2,$h3,$h34k,$h4)=map("v$_",(20..25)); 2456my ($h5,$h56k,$h6,$h7,$h78k,$h8)=map("v$_",(20..25)); 2457my ($h1q,$h12kq,$h2q,$h3q,$h34kq,$h4q)=map("q$_",(20..25)); 2458my ($h5q,$h56kq,$h6q,$h7q,$h78kq,$h8q)=map("q$_",(20..25)); 2459 2460my $t0="v16"; 2461my $t0d="d16"; 2462 2463my $t1="v29"; 2464my $t2=$res1; 2465my $t3=$t1; 2466 2467my $t4=$res0; 2468my $t5=$res2; 2469my $t6=$t0; 2470 2471my $t7=$res3; 2472my $t8=$res4; 2473my $t9=$res5; 2474 2475my $t10=$res6; 2476my $t11="v21"; 2477my $t12=$t1; 2478 2479my $rtmp_ctr="v30"; 2480my $rtmp_ctrq="q30"; 2481my $rctr_inc="v31"; 2482my $rctr_incd="d31"; 2483 2484my $mod_constantd=$t0d; 2485my $mod_constant=$t0; 2486 2487my ($rk0,$rk1,$rk2)=map("v$_.16b",(26..28)); 2488my ($rk3,$rk4,$rk5)=map("v$_.16b",(26..28)); 2489my ($rk6,$rk7,$rk8)=map("v$_.16b",(26..28)); 2490my ($rk9,$rk10,$rk11)=map("v$_.16b",(26..28)); 2491my ($rk12,$rk13,$rk14)=map("v$_.16b",(26..28)); 2492my ($rk0q,$rk1q,$rk2q)=map("q$_",(26..28)); 2493my ($rk3q,$rk4q,$rk5q)=map("q$_",(26..28)); 2494my ($rk6q,$rk7q,$rk8q)=map("q$_",(26..28)); 2495my ($rk9q,$rk10q,$rk11q)=map("q$_",(26..28)); 2496my ($rk12q,$rk13q,$rk14q)=map("q$_",(26..28)); 2497my $rk2q1="v28.1q"; 2498my $rk3q1="v26.1q"; 2499my $rk4v="v27"; 2500 2501######################################################################################### 2502# size_t unroll8_eor3_aes_gcm_enc_192_kernel(const unsigned char *in, 2503# size_t len, 2504# unsigned char *out, 2505# const void *key, 2506# unsigned char ivec[16], 2507# u64 *Xi); 2508# 2509$code.=<<___; 2510.global unroll8_eor3_aes_gcm_enc_192_kernel 2511.type unroll8_eor3_aes_gcm_enc_192_kernel,%function 2512.align 4 2513unroll8_eor3_aes_gcm_enc_192_kernel: 2514 AARCH64_VALID_CALL_TARGET 2515 cbz x1, .L192_enc_ret 2516 stp d8, d9, [sp, #-80]! 2517 mov $counter, x4 2518 mov $cc, x5 2519 stp d10, d11, [sp, #16] 2520 stp d12, d13, [sp, #32] 2521 stp d14, d15, [sp, #48] 2522 mov x5, #0xc200000000000000 2523 stp x5, xzr, [sp, #64] 2524 add $modulo_constant, sp, #64 2525 2526 lsr $main_end_input_ptr, $bit_length, #3 @ byte_len 2527 ld1 { $ctr0b}, [$counter] @ CTR block 0 2528 2529 mov $constant_temp, #0x100000000 @ set up counter increment 2530 movi $rctr_inc.16b, #0x0 2531 mov $rctr_inc.d[1], $constant_temp 2532 2533 rev32 $rtmp_ctr.16b, $ctr0.16b @ set up reversed counter 2534 2535 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 0 2536 2537 rev32 $ctr1.16b, $rtmp_ctr.16b @ CTR block 1 2538 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 1 2539 2540 rev32 $ctr2.16b, $rtmp_ctr.16b @ CTR block 2 2541 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 2 2542 2543 rev32 $ctr3.16b, $rtmp_ctr.16b @ CTR block 3 2544 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 3 2545 2546 rev32 $ctr4.16b, $rtmp_ctr.16b @ CTR block 4 2547 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 4 2548 sub $main_end_input_ptr, $main_end_input_ptr, #1 @ byte_len - 1 2549 2550 and $main_end_input_ptr, $main_end_input_ptr, #0xffffffffffffff80 @ number of bytes to be processed in main loop (at least 1 byte must be handled by tail) 2551 2552 rev32 $ctr5.16b, $rtmp_ctr.16b @ CTR block 5 2553 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 5 2554 ldp $rk0q, $rk1q, [$cc, #0] @ load rk0, rk1 2555 2556 add $main_end_input_ptr, $main_end_input_ptr, $input_ptr 2557 2558 rev32 $ctr6.16b, $rtmp_ctr.16b @ CTR block 6 2559 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 6 2560 2561 rev32 $ctr7.16b, $rtmp_ctr.16b @ CTR block 7 2562 2563 aese $ctr5b, $rk0 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 0 2564 aese $ctr4b, $rk0 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 0 2565 aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 0 2566 2567 aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 0 2568 aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 0 2569 aese $ctr7b, $rk0 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 0 2570 2571 aese $ctr6b, $rk0 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 0 2572 aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 0 2573 ldp $rk2q, $rk3q, [$cc, #32] @ load rk2, rk3 2574 2575 aese $ctr5b, $rk1 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 1 2576 aese $ctr7b, $rk1 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 1 2577 2578 aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 1 2579 aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 1 2580 aese $ctr6b, $rk1 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 1 2581 2582 aese $ctr5b, $rk2 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 2 2583 aese $ctr4b, $rk1 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 1 2584 aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 1 2585 2586 aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 1 2587 aese $ctr7b, $rk2 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 2 2588 aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 2 2589 2590 aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 2 2591 aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 2 2592 2593 aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 2 2594 aese $ctr4b, $rk2 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 2 2595 aese $ctr6b, $rk2 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 2 2596 2597 ldp $rk4q, $rk5q, [$cc, #64] @ load rk4, rk5 2598 aese $ctr4b, $rk3 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 3 2599 2600 aese $ctr7b, $rk3 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 3 2601 aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 3 2602 aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 3 2603 2604 aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 3 2605 2606 aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 3 2607 2608 aese $ctr6b, $rk3 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 3 2609 2610 aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 4 2611 aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 4 2612 aese $ctr5b, $rk3 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 3 2613 2614 aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 4 2615 aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 4 2616 aese $ctr4b, $rk4 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 4 2617 2618 aese $ctr6b, $rk4 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 4 2619 aese $ctr7b, $rk4 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 4 2620 aese $ctr5b, $rk4 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 4 2621 2622 aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 5 2623 ldp $rk6q, $rk7q, [$cc, #96] @ load rk6, rk7 2624 aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 5 2625 2626 aese $ctr4b, $rk5 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 5 2627 aese $ctr7b, $rk5 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 5 2628 aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 5 2629 2630 aese $ctr5b, $rk5 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 5 2631 aese $ctr6b, $rk5 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 5 2632 aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 5 2633 2634 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 7 2635 2636 aese $ctr5b, $rk6 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 6 2637 aese $ctr4b, $rk6 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 6 2638 aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 6 2639 2640 aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 6 2641 aese $ctr6b, $rk6 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 6 2642 aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 6 2643 2644 aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 6 2645 aese $ctr7b, $rk6 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 6 2646 ldp $rk8q, $rk9q, [$cc, #128] @ load rk8, rk9 2647 2648 aese $ctr6b, $rk7 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 7 2649 aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 7 2650 2651 aese $ctr4b, $rk7 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 7 2652 aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 7 2653 2654 aese $ctr7b, $rk7 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 7 2655 aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 7 2656 2657 aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 7 2658 aese $ctr5b, $rk7 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 7 2659 2660 aese $ctr7b, $rk8 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 8 2661 aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 8 2662 2663 aese $ctr4b, $rk8 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 8 2664 aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 8 2665 aese $ctr5b, $rk8 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 8 2666 2667 aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 8 2668 aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 8 2669 aese $ctr6b, $rk8 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 8 2670 2671 add $end_input_ptr, $input_ptr, $bit_length, lsr #3 @ end_input_ptr 2672 cmp $input_ptr, $main_end_input_ptr @ check if we have <= 8 blocks 2673 aese $ctr3b, $rk9 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 9 2674 2675 ld1 { $acc_lb}, [$current_tag] 2676 ext $acc_lb, $acc_lb, $acc_lb, #8 2677 rev64 $acc_lb, $acc_lb 2678 ldp $rk10q, $rk11q, [$cc, #160] @ load rk10, rk11 2679 2680 aese $ctr6b, $rk9 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 9 2681 aese $ctr1b, $rk9 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 9 2682 2683 aese $ctr5b, $rk9 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 9 2684 aese $ctr2b, $rk9 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 9 2685 2686 aese $ctr0b, $rk9 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 9 2687 aese $ctr4b, $rk9 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 9 2688 2689 aese $ctr6b, $rk10 \n aesmc $ctr6b, $ctr6b @ AES block 14 - round 10 2690 aese $ctr7b, $rk9 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 9 2691 aese $ctr3b, $rk10 \n aesmc $ctr3b, $ctr3b @ AES block 11 - round 10 2692 2693 aese $ctr1b, $rk10 \n aesmc $ctr1b, $ctr1b @ AES block 9 - round 10 2694 aese $ctr5b, $rk10 \n aesmc $ctr5b, $ctr5b @ AES block 13 - round 10 2695 aese $ctr4b, $rk10 \n aesmc $ctr4b, $ctr4b @ AES block 12 - round 10 2696 2697 aese $ctr0b, $rk10 \n aesmc $ctr0b, $ctr0b @ AES block 8 - round 10 2698 aese $ctr2b, $rk10 \n aesmc $ctr2b, $ctr2b @ AES block 10 - round 10 2699 aese $ctr7b, $rk10 \n aesmc $ctr7b, $ctr7b @ AES block 15 - round 10 2700 2701 aese $ctr6b, $rk11 @ AES block 14 - round 11 2702 aese $ctr3b, $rk11 @ AES block 11 - round 11 2703 2704 aese $ctr4b, $rk11 @ AES block 12 - round 11 2705 aese $ctr7b, $rk11 @ AES block 15 - round 11 2706 ldr $rk12q, [$cc, #192] @ load rk12 2707 2708 aese $ctr1b, $rk11 @ AES block 9 - round 11 2709 aese $ctr5b, $rk11 @ AES block 13 - round 11 2710 2711 aese $ctr2b, $rk11 @ AES block 10 - round 11 2712 aese $ctr0b, $rk11 @ AES block 8 - round 11 2713 b.ge .L192_enc_tail @ handle tail 2714 2715 ldp $ctr_t0q, $ctr_t1q, [$input_ptr], #32 @ AES block 0, 1 - load plaintext 2716 2717 ldp $ctr_t2q, $ctr_t3q, [$input_ptr], #32 @ AES block 2, 3 - load plaintext 2718 2719 ldp $ctr_t4q, $ctr_t5q, [$input_ptr], #32 @ AES block 4, 5 - load plaintext 2720 2721 ldp $ctr_t6q, $ctr_t7q, [$input_ptr], #32 @ AES block 6, 7 - load plaintext 2722 2723 eor3 $res0b, $ctr_t0b, $ctr0b, $rk12 @ AES block 0 - result 2724 rev32 $ctr0.16b, $rtmp_ctr.16b @ CTR block 8 2725 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8 2726 2727 eor3 $res3b, $ctr_t3b, $ctr3b, $rk12 @ AES block 3 - result 2728 eor3 $res1b, $ctr_t1b, $ctr1b, $rk12 @ AES block 1 - result 2729 2730 rev32 $ctr1.16b, $rtmp_ctr.16b @ CTR block 9 2731 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 9 2732 eor3 $res4b, $ctr_t4b, $ctr4b, $rk12 @ AES block 4 - result 2733 2734 eor3 $res5b, $ctr_t5b, $ctr5b, $rk12 @ AES block 5 - result 2735 eor3 $res7b, $ctr_t7b, $ctr7b, $rk12 @ AES block 7 - result 2736 stp $res0q, $res1q, [$output_ptr], #32 @ AES block 0, 1 - store result 2737 2738 eor3 $res2b, $ctr_t2b, $ctr2b, $rk12 @ AES block 2 - result 2739 rev32 $ctr2.16b, $rtmp_ctr.16b @ CTR block 10 2740 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 10 2741 2742 stp $res2q, $res3q, [$output_ptr], #32 @ AES block 2, 3 - store result 2743 cmp $input_ptr, $main_end_input_ptr @ check if we have <= 8 blocks 2744 2745 rev32 $ctr3.16b, $rtmp_ctr.16b @ CTR block 11 2746 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 11 2747 eor3 $res6b, $ctr_t6b, $ctr6b, $rk12 @ AES block 6 - result 2748 2749 stp $res4q, $res5q, [$output_ptr], #32 @ AES block 4, 5 - store result 2750 2751 rev32 $ctr4.16b, $rtmp_ctr.16b @ CTR block 12 2752 stp $res6q, $res7q, [$output_ptr], #32 @ AES block 6, 7 - store result 2753 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 12 2754 2755 b.ge .L192_enc_prepretail @ do prepretail 2756 2757.L192_enc_main_loop: @ main loop start 2758 rev64 $res4b, $res4b @ GHASH block 8k+4 (t0, t1, and t2 free) 2759 ldp $rk0q, $rk1q, [$cc, #0] @ load rk0, rk1 2760 rev64 $res2b, $res2b @ GHASH block 8k+2 2761 2762 rev32 $ctr5.16b, $rtmp_ctr.16b @ CTR block 8k+13 2763 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+13 2764 ldr $h7q, [$current_tag, #176] @ load h7l | h7h 2765 ext $h7.16b, $h7.16b, $h7.16b, #8 2766 ldr $h8q, [$current_tag, #208] @ load h8l | h8h 2767 ext $h8.16b, $h8.16b, $h8.16b, #8 2768 2769 ext $acc_lb, $acc_lb, $acc_lb, #8 @ PRE 0 2770 rev64 $res0b, $res0b @ GHASH block 8k 2771 ldr $h5q, [$current_tag, #128] @ load h5l | h5h 2772 ext $h5.16b, $h5.16b, $h5.16b, #8 2773 ldr $h6q, [$current_tag, #160] @ load h6l | h6h 2774 ext $h6.16b, $h6.16b, $h6.16b, #8 2775 2776 rev64 $res1b, $res1b @ GHASH block 8k+1 2777 rev32 $ctr6.16b, $rtmp_ctr.16b @ CTR block 8k+14 2778 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+14 2779 2780 eor $res0b, $res0b, $acc_lb @ PRE 1 2781 rev64 $res3b, $res3b @ GHASH block 8k+3 2782 rev64 $res5b, $res5b @ GHASH block 8k+5 (t0, t1, t2 and t3 free) 2783 2784 aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 0 2785 rev32 $ctr7.16b, $rtmp_ctr.16b @ CTR block 8k+15 2786 aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 0 2787 2788 aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 0 2789 aese $ctr5b, $rk0 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 0 2790 aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 0 2791 2792 aese $ctr7b, $rk0 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 0 2793 aese $ctr4b, $rk0 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 0 2794 aese $ctr6b, $rk0 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 0 2795 2796 ldp $rk2q, $rk3q, [$cc, #32] @ load rk2, rk3 2797 pmull2 $acc_h.1q, $res0.2d, $h8.2d @ GHASH block 8k - high 2798 aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 1 2799 2800 aese $ctr4b, $rk1 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 1 2801 pmull2 $t0.1q, $res1.2d, $h7.2d @ GHASH block 8k+1 - high 2802 pmull $h7.1q, $res1.1d, $h7.1d @ GHASH block 8k+1 - low 2803 2804 trn1 $acc_m.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid 2805 aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 1 2806 ldr $h56kq, [$current_tag, #144] @ load h6k | h5k 2807 ldr $h78kq, [$current_tag, #192] @ load h8k | h7k 2808 2809 pmull2 $t1.1q, $res2.2d, $h6.2d @ GHASH block 8k+2 - high 2810 pmull $acc_l.1q, $res0.1d, $h8.1d @ GHASH block 8k - low 2811 trn2 $res0.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid 2812 2813 aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 1 2814 aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 1 2815 aese $ctr5b, $rk1 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 1 2816 2817 eor $acc_hb, $acc_hb, $t0.16b @ GHASH block 8k+1 - high 2818 aese $ctr6b, $rk1 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 1 2819 aese $ctr7b, $rk1 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 1 2820 2821 pmull2 $t2.1q, $res3.2d, $h5.2d @ GHASH block 8k+3 - high 2822 eor $res0.16b, $res0.16b, $acc_m.16b @ GHASH block 8k, 8k+1 - mid 2823 aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 2 2824 2825 aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 2 2826 aese $ctr4b, $rk2 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 2 2827 aese $ctr6b, $rk2 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 2 2828 2829 aese $ctr5b, $rk2 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 2 2830 aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 3 2831 eor3 $acc_hb, $acc_hb, $t1.16b, $t2.16b @ GHASH block 8k+2, 8k+3 - high 2832 2833 pmull $h6.1q, $res2.1d, $h6.1d @ GHASH block 8k+2 - low 2834 aese $ctr7b, $rk2 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 2 2835 aese $ctr4b, $rk3 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 3 2836 2837 aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 2 2838 trn1 $t3.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid 2839 aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 2 2840 2841 trn2 $res2.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid 2842 aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 3 2843 ldp $rk4q, $rk5q, [$cc, #64] @ load rk4, rk5 2844 2845 aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 3 2846 eor $acc_lb, $acc_lb, $h7.16b @ GHASH block 8k+1 - low 2847 ldr $h3q, [$current_tag, #80] @ load h3l | h3h 2848 ext $h3.16b, $h3.16b, $h3.16b, #8 2849 ldr $h4q, [$current_tag, #112] @ load h4l | h4h 2850 ext $h4.16b, $h4.16b, $h4.16b, #8 2851 2852 pmull2 $acc_m.1q, $res0.2d, $h78k.2d @ GHASH block 8k - mid 2853 pmull $h78k.1q, $res0.1d, $h78k.1d @ GHASH block 8k+1 - mid 2854 pmull $h5.1q, $res3.1d, $h5.1d @ GHASH block 8k+3 - low 2855 2856 aese $ctr5b, $rk3 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 3 2857 eor $res2.16b, $res2.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid 2858 trn1 $t6.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid 2859 2860 eor $acc_mb, $acc_mb, $h78k.16b @ GHASH block 8k+1 - mid 2861 aese $ctr6b, $rk3 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 3 2862 eor3 $acc_lb, $acc_lb, $h6.16b, $h5.16b @ GHASH block 8k+2, 8k+3 - low 2863 2864 aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 4 2865 aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 4 2866 aese $ctr7b, $rk3 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 3 2867 2868 pmull2 $t3.1q, $res2.2d, $h56k.2d @ GHASH block 8k+2 - mid 2869 aese $ctr6b, $rk4 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 4 2870 aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 3 2871 2872 pmull $h56k.1q, $res2.1d, $h56k.1d @ GHASH block 8k+3 - mid 2873 aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 4 2874 aese $ctr4b, $rk4 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 4 2875 2876 aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 4 2877 aese $ctr5b, $rk4 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 4 2878 aese $ctr7b, $rk4 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 4 2879 2880 eor3 $acc_mb, $acc_mb, $h56k.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid 2881 aese $ctr4b, $rk5 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 5 2882 ldr $h1q, [$current_tag, #32] @ load h1l | h1h 2883 ext $h1.16b, $h1.16b, $h1.16b, #8 2884 ldr $h2q, [$current_tag, #64] @ load h2l | h2h 2885 ext $h2.16b, $h2.16b, $h2.16b, #8 2886 2887 ldp $rk6q, $rk7q, [$cc, #96] @ load rk6, rk7 2888 aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 5 2889 rev64 $res7b, $res7b @ GHASH block 8k+7 (t0, t1, t2 and t3 free) 2890 2891 rev64 $res6b, $res6b @ GHASH block 8k+6 (t0, t1, and t2 free) 2892 pmull2 $t4.1q, $res4.2d, $h4.2d @ GHASH block 8k+4 - high 2893 pmull $h4.1q, $res4.1d, $h4.1d @ GHASH block 8k+4 - low 2894 2895 aese $ctr5b, $rk5 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 5 2896 trn2 $res4.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid 2897 2898 aese $ctr6b, $rk5 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 5 2899 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k 2900 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k 2901 2902 aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 5 2903 pmull2 $t5.1q, $res5.2d, $h3.2d @ GHASH block 8k+5 - high 2904 eor $res4.16b, $res4.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid 2905 2906 aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 5 2907 aese $ctr7b, $rk5 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 5 2908 aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 5 2909 2910 pmull $h3.1q, $res5.1d, $h3.1d @ GHASH block 8k+5 - low 2911 aese $ctr4b, $rk6 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 6 2912 trn1 $t9.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid 2913 2914 aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 6 2915 aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 6 2916 pmull2 $t7.1q, $res6.2d, $h2.2d @ GHASH block 8k+6 - high 2917 2918 pmull $h2.1q, $res6.1d, $h2.1d @ GHASH block 8k+6 - low 2919 trn2 $res6.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid 2920 aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 6 2921 2922 aese $ctr6b, $rk6 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 6 2923 aese $ctr5b, $rk6 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 6 2924 2925 aese $ctr7b, $rk6 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 6 2926 aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 7 2927 aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 6 2928 2929 aese $ctr6b, $rk7 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 7 2930 eor $res6.16b, $res6.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid 2931 2932 pmull2 $t6.1q, $res4.2d, $h34k.2d @ GHASH block 8k+4 - mid 2933 ldp $rk8q, $rk9q, [$cc, #128] @ load rk8, rk9 2934 pmull $h34k.1q, $res4.1d, $h34k.1d @ GHASH block 8k+5 - mid 2935 2936 aese $ctr4b, $rk7 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 7 2937 pmull2 $t8.1q, $res7.2d, $h1.2d @ GHASH block 8k+7 - high 2938 aese $ctr5b, $rk7 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 7 2939 2940 eor3 $acc_mb, $acc_mb, $h34k.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid 2941 aese $ctr7b, $rk7 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 7 2942 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+15 2943 2944 ldr $mod_constantd, [$modulo_constant] @ MODULO - load modulo constant 2945 eor3 $acc_hb, $acc_hb, $t4.16b, $t5.16b @ GHASH block 8k+4, 8k+5 - high 2946 aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 7 2947 2948 pmull2 $t9.1q, $res6.2d, $h12k.2d @ GHASH block 8k+6 - mid 2949 pmull $h1.1q, $res7.1d, $h1.1d @ GHASH block 8k+7 - low 2950 aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 7 2951 2952 aese $ctr5b, $rk8 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 8 2953 aese $ctr4b, $rk8 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 8 2954 aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 8 2955 2956 aese $ctr6b, $rk8 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 8 2957 eor3 $acc_lb, $acc_lb, $h4.16b, $h3.16b @ GHASH block 8k+4, 8k+5 - low 2958 aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 7 2959 2960 aese $ctr7b, $rk8 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 8 2961 aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 8 2962 pmull $h12k.1q, $res6.1d, $h12k.1d @ GHASH block 8k+7 - mid 2963 2964 aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 8 2965 aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 8 2966 ldp $rk10q, $rk11q, [$cc, #160] @ load rk10, rk11 2967 2968 eor3 $acc_lb, $acc_lb, $h2.16b, $h1.16b @ GHASH block 8k+6, 8k+7 - low 2969 rev32 $h1.16b, $rtmp_ctr.16b @ CTR block 8k+16 2970 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+16 2971 2972 aese $ctr2b, $rk9 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 9 2973 eor3 $acc_mb, $acc_mb, $h12k.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid 2974 eor3 $acc_hb, $acc_hb, $t7.16b, $t8.16b @ GHASH block 8k+6, 8k+7 - high 2975 2976 aese $ctr6b, $rk9 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 9 2977 aese $ctr3b, $rk9 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 9 2978 ldp $ctr_t0q, $ctr_t1q, [$input_ptr], #32 @ AES block 8k+8, 8k+9 - load plaintext 2979 2980 pmull $t11.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid 2981 rev32 $h2.16b, $rtmp_ctr.16b @ CTR block 8k+17 2982 aese $ctr0b, $rk9 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 9 2983 2984 aese $ctr4b, $rk9 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 9 2985 aese $ctr1b, $rk9 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 9 2986 aese $ctr7b, $rk9 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 9 2987 2988 eor3 $acc_mb, $acc_mb, $acc_hb, $acc_lb @ MODULO - karatsuba tidy up 2989 aese $ctr5b, $rk9 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 9 2990 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+17 2991 2992 aese $ctr2b, $rk10 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 10 2993 aese $ctr4b, $rk10 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 10 2994 ldr $rk12q, [$cc, #192] @ load rk12 2995 ext $t12.16b, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment 2996 2997 aese $ctr0b, $rk10 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 10 2998 aese $ctr7b, $rk10 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 10 2999 ldp $ctr_t2q, $ctr_t3q, [$input_ptr], #32 @ AES block 8k+10, 8k+11 - load plaintext 3000 3001 aese $ctr4b, $rk11 @ AES block 8k+12 - round 11 3002 eor3 $acc_mb, $acc_mb, $t12.16b, $t11.16b @ MODULO - fold into mid 3003 ldp $ctr_t4q, $ctr_t5q, [$input_ptr], #32 @ AES block 8k+12, 8k+13 - load plaintext 3004 3005 ldp $ctr_t6q, $ctr_t7q, [$input_ptr], #32 @ AES block 8k+14, 8k+15 - load plaintext 3006 aese $ctr2b, $rk11 @ AES block 8k+10 - round 11 3007 aese $ctr1b, $rk10 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 10 3008 3009 rev32 $h3.16b, $rtmp_ctr.16b @ CTR block 8k+18 3010 aese $ctr5b, $rk10 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 10 3011 3012 aese $ctr3b, $rk10 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 10 3013 pmull $acc_h.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low 3014 3015 aese $ctr6b, $rk10 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 10 3016 aese $ctr5b, $rk11 @ AES block 8k+13 - round 11 3017 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+18 3018 3019 aese $ctr7b, $rk11 @ AES block 8k+15 - round 11 3020 aese $ctr0b, $rk11 @ AES block 8k+8 - round 11 3021 eor3 $res4b, $ctr_t4b, $ctr4b, $rk12 @ AES block 4 - result 3022 3023 aese $ctr6b, $rk11 @ AES block 8k+14 - round 11 3024 aese $ctr3b, $rk11 @ AES block 8k+11 - round 11 3025 aese $ctr1b, $rk11 @ AES block 8k+9 - round 11 3026 3027 rev32 $h4.16b, $rtmp_ctr.16b @ CTR block 8k+19 3028 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+19 3029 eor3 $res7b, $ctr_t7b, $ctr7b, $rk12 @ AES block 7 - result 3030 3031 eor3 $res2b, $ctr_t2b, $ctr2b, $rk12 @ AES block 8k+10 - result 3032 eor3 $res0b, $ctr_t0b, $ctr0b, $rk12 @ AES block 8k+8 - result 3033 mov $ctr2.16b, $h3.16b @ CTR block 8k+18 3034 3035 eor3 $res1b, $ctr_t1b, $ctr1b, $rk12 @ AES block 8k+9 - result 3036 mov $ctr1.16b, $h2.16b @ CTR block 8k+17 3037 stp $res0q, $res1q, [$output_ptr], #32 @ AES block 8k+8, 8k+9 - store result 3038 ext $t11.16b, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment 3039 3040 eor3 $res6b, $ctr_t6b, $ctr6b, $rk12 @ AES block 6 - result 3041 mov $ctr0.16b, $h1.16b @ CTR block 8k+16 3042 rev32 $ctr4.16b, $rtmp_ctr.16b @ CTR block 8k+20 3043 3044 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+20 3045 eor3 $res5b, $ctr_t5b, $ctr5b, $rk12 @ AES block 5 - result 3046 eor3 $acc_lb, $acc_lb, $acc_hb, $t11.16b @ MODULO - fold into low 3047 3048 eor3 $res3b, $ctr_t3b, $ctr3b, $rk12 @ AES block 8k+11 - result 3049 mov $ctr3.16b, $h4.16b @ CTR block 8k+19 3050 3051 stp $res2q, $res3q, [$output_ptr], #32 @ AES block 8k+10, 8k+11 - store result 3052 3053 stp $res4q, $res5q, [$output_ptr], #32 @ AES block 8k+12, 8k+13 - store result 3054 3055 cmp $input_ptr, $main_end_input_ptr @ LOOP CONTROL 3056 stp $res6q, $res7q, [$output_ptr], #32 @ AES block 8k+14, 8k+15 - store result 3057 b.lt .L192_enc_main_loop 3058 3059.L192_enc_prepretail: @ PREPRETAIL 3060 rev32 $ctr5.16b, $rtmp_ctr.16b @ CTR block 8k+13 3061 ldp $rk0q, $rk1q, [$cc, #0] @ load rk0, rk1 3062 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+13 3063 3064 ldr $h7q, [$current_tag, #176] @ load h7l | h7h 3065 ext $h7.16b, $h7.16b, $h7.16b, #8 3066 ldr $h8q, [$current_tag, #208] @ load h8l | h8h 3067 ext $h8.16b, $h8.16b, $h8.16b, #8 3068 rev64 $res0b, $res0b @ GHASH block 8k 3069 ext $acc_lb, $acc_lb, $acc_lb, #8 @ PRE 0 3070 3071 rev32 $ctr6.16b, $rtmp_ctr.16b @ CTR block 8k+14 3072 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+14 3073 ldr $h56kq, [$current_tag, #144] @ load h6k | h5k 3074 ldr $h78kq, [$current_tag, #192] @ load h8k | h7k 3075 3076 rev64 $res3b, $res3b @ GHASH block 8k+3 3077 rev64 $res2b, $res2b @ GHASH block 8k+2 3078 ldr $h5q, [$current_tag, #128] @ load h5l | h5h 3079 ext $h5.16b, $h5.16b, $h5.16b, #8 3080 ldr $h6q, [$current_tag, #160] @ load h6l | h6h 3081 ext $h6.16b, $h6.16b, $h6.16b, #8 3082 3083 eor $res0b, $res0b, $acc_lb @ PRE 1 3084 rev32 $ctr7.16b, $rtmp_ctr.16b @ CTR block 8k+15 3085 rev64 $res1b, $res1b @ GHASH block 8k+1 3086 3087 aese $ctr5b, $rk0 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 0 3088 aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 0 3089 aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 0 3090 3091 pmull2 $t0.1q, $res1.2d, $h7.2d @ GHASH block 8k+1 - high 3092 aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 0 3093 aese $ctr6b, $rk0 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 0 3094 3095 aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 0 3096 aese $ctr4b, $rk0 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 0 3097 pmull2 $acc_h.1q, $res0.2d, $h8.2d @ GHASH block 8k - high 3098 3099 aese $ctr6b, $rk1 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 1 3100 pmull $acc_l.1q, $res0.1d, $h8.1d @ GHASH block 8k - low 3101 trn1 $acc_m.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid 3102 3103 trn2 $res0.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid 3104 aese $ctr7b, $rk0 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 0 3105 ldp $rk2q, $rk3q, [$cc, #32] @ load rk2, rk3 3106 3107 pmull $h7.1q, $res1.1d, $h7.1d @ GHASH block 8k+1 - low 3108 eor $acc_hb, $acc_hb, $t0.16b @ GHASH block 8k+1 - high 3109 aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 1 3110 3111 aese $ctr5b, $rk1 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 1 3112 eor $res0.16b, $res0.16b, $acc_m.16b @ GHASH block 8k, 8k+1 - mid 3113 aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 1 3114 3115 aese $ctr7b, $rk1 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 1 3116 pmull2 $t2.1q, $res3.2d, $h5.2d @ GHASH block 8k+3 - high 3117 pmull2 $t1.1q, $res2.2d, $h6.2d @ GHASH block 8k+2 - high 3118 3119 aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 1 3120 aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 1 3121 aese $ctr4b, $rk1 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 1 3122 3123 pmull $h6.1q, $res2.1d, $h6.1d @ GHASH block 8k+2 - low 3124 aese $ctr5b, $rk2 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 2 3125 eor $acc_lb, $acc_lb, $h7.16b @ GHASH block 8k+1 - low 3126 3127 pmull $h5.1q, $res3.1d, $h5.1d @ GHASH block 8k+3 - low 3128 aese $ctr7b, $rk2 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 2 3129 eor3 $acc_hb, $acc_hb, $t1.16b, $t2.16b @ GHASH block 8k+2, 8k+3 - high 3130 3131 aese $ctr5b, $rk3 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 3 3132 trn1 $t3.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid 3133 aese $ctr6b, $rk2 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 2 3134 3135 aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 2 3136 pmull2 $acc_m.1q, $res0.2d, $h78k.2d @ GHASH block 8k - mid 3137 trn2 $res2.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid 3138 3139 aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 2 3140 rev64 $res5b, $res5b @ GHASH block 8k+5 (t0, t1, t2 and t3 free) 3141 rev64 $res6b, $res6b @ GHASH block 8k+6 (t0, t1, and t2 free) 3142 3143 aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 2 3144 aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 2 3145 aese $ctr4b, $rk2 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 2 3146 3147 eor $res2.16b, $res2.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid 3148 pmull $h78k.1q, $res0.1d, $h78k.1d @ GHASH block 8k+1 - mid 3149 ldp $rk4q, $rk5q, [$cc, #64] @ load rk4, rk5 3150 3151 aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 3 3152 aese $ctr6b, $rk3 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 3 3153 aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 3 3154 3155 eor $acc_mb, $acc_mb, $h78k.16b @ GHASH block 8k+1 - mid 3156 eor3 $acc_lb, $acc_lb, $h6.16b, $h5.16b @ GHASH block 8k+2, 8k+3 - low 3157 aese $ctr7b, $rk3 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 3 3158 3159 ldr $h3q, [$current_tag, #80] @ load h3l | h3h 3160 ext $h3.16b, $h3.16b, $h3.16b, #8 3161 ldr $h4q, [$current_tag, #112] @ load h4l | h4h 3162 ext $h4.16b, $h4.16b, $h4.16b, #8 3163 aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 3 3164 pmull2 $t3.1q, $res2.2d, $h56k.2d @ GHASH block 8k+2 - mid 3165 3166 ldr $h1q, [$current_tag, #32] @ load h1l | h1h 3167 ext $h1.16b, $h1.16b, $h1.16b, #8 3168 ldr $h2q, [$current_tag, #64] @ load h2l | h2h 3169 ext $h2.16b, $h2.16b, $h2.16b, #8 3170 aese $ctr4b, $rk3 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 3 3171 rev64 $res4b, $res4b @ GHASH block 8k+4 (t0, t1, and t2 free) 3172 3173 aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 3 3174 pmull $h56k.1q, $res2.1d, $h56k.1d @ GHASH block 8k+3 - mid 3175 aese $ctr6b, $rk4 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 4 3176 3177 trn1 $t6.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid 3178 aese $ctr7b, $rk4 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 4 3179 aese $ctr5b, $rk4 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 4 3180 3181 eor3 $acc_mb, $acc_mb, $h56k.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid 3182 aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 4 3183 aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 4 3184 3185 aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 4 3186 aese $ctr4b, $rk4 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 4 3187 aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 4 3188 3189 aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 5 3190 rev64 $res7b, $res7b @ GHASH block 8k+7 (t0, t1, t2 and t3 free) 3191 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k 3192 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k 3193 3194 aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 5 3195 aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 5 3196 ldp $rk6q, $rk7q, [$cc, #96] @ load rk6, rk7 3197 3198 pmull2 $t7.1q, $res6.2d, $h2.2d @ GHASH block 8k+6 - high 3199 pmull2 $t4.1q, $res4.2d, $h4.2d @ GHASH block 8k+4 - high 3200 pmull $h4.1q, $res4.1d, $h4.1d @ GHASH block 8k+4 - low 3201 3202 aese $ctr4b, $rk5 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 5 3203 trn2 $res4.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid 3204 3205 pmull2 $t5.1q, $res5.2d, $h3.2d @ GHASH block 8k+5 - high 3206 pmull $h3.1q, $res5.1d, $h3.1d @ GHASH block 8k+5 - low 3207 pmull $h2.1q, $res6.1d, $h2.1d @ GHASH block 8k+6 - low 3208 3209 trn1 $t9.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid 3210 eor $res4.16b, $res4.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid 3211 trn2 $res6.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid 3212 3213 aese $ctr5b, $rk5 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 5 3214 aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 6 3215 aese $ctr7b, $rk5 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 5 3216 3217 aese $ctr6b, $rk5 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 5 3218 eor $res6.16b, $res6.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid 3219 aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 5 3220 3221 pmull2 $t6.1q, $res4.2d, $h34k.2d @ GHASH block 8k+4 - mid 3222 pmull $h34k.1q, $res4.1d, $h34k.1d @ GHASH block 8k+5 - mid 3223 3224 aese $ctr4b, $rk6 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 6 3225 aese $ctr5b, $rk6 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 6 3226 aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 7 3227 3228 aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 6 3229 aese $ctr7b, $rk6 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 6 3230 eor3 $acc_mb, $acc_mb, $h34k.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid 3231 3232 aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 6 3233 eor3 $acc_hb, $acc_hb, $t4.16b, $t5.16b @ GHASH block 8k+4, 8k+5 - high 3234 aese $ctr5b, $rk7 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 7 3235 3236 aese $ctr6b, $rk6 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 6 3237 ldr $mod_constantd, [$modulo_constant] @ MODULO - load modulo constant 3238 aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 6 3239 3240 pmull2 $t9.1q, $res6.2d, $h12k.2d @ GHASH block 8k+6 - mid 3241 aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 7 3242 eor3 $acc_lb, $acc_lb, $h4.16b, $h3.16b @ GHASH block 8k+4, 8k+5 - low 3243 3244 pmull2 $t8.1q, $res7.2d, $h1.2d @ GHASH block 8k+7 - high 3245 pmull $h12k.1q, $res6.1d, $h12k.1d @ GHASH block 8k+7 - mid 3246 pmull $h1.1q, $res7.1d, $h1.1d @ GHASH block 8k+7 - low 3247 3248 aese $ctr4b, $rk7 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 7 3249 aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 7 3250 ldp $rk8q, $rk9q, [$cc, #128] @ load rk8, rk9 3251 3252 aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 7 3253 eor3 $acc_mb, $acc_mb, $h12k.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid 3254 3255 eor3 $acc_lb, $acc_lb, $h2.16b, $h1.16b @ GHASH block 8k+6, 8k+7 - low 3256 eor3 $acc_hb, $acc_hb, $t7.16b, $t8.16b @ GHASH block 8k+6, 8k+7 - high 3257 3258 eor3 $acc_mb, $acc_mb, $acc_hb, $acc_lb @ MODULO - karatsuba tidy up 3259 ext $t12.16b, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment 3260 aese $ctr7b, $rk7 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 7 3261 pmull $t11.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid 3262 3263 aese $ctr5b, $rk8 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 8 3264 aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 8 3265 3266 aese $ctr6b, $rk7 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 7 3267 aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 8 3268 eor3 $acc_mb, $acc_mb, $t12.16b, $t11.16b @ MODULO - fold into mid 3269 3270 aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 8 3271 aese $ctr5b, $rk9 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 9 3272 aese $ctr4b, $rk8 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 8 3273 3274 aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 8 3275 aese $ctr7b, $rk8 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 8 3276 aese $ctr6b, $rk8 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 8 3277 3278 aese $ctr3b, $rk9 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 9 3279 ldp $rk10q, $rk11q, [$cc, #160] @ load rk10, rk11 3280 aese $ctr4b, $rk9 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 9 3281 3282 aese $ctr2b, $rk9 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 9 3283 aese $ctr7b, $rk9 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 9 3284 3285 ext $t11.16b, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment 3286 aese $ctr6b, $rk9 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 9 3287 aese $ctr0b, $rk9 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 9 3288 aese $ctr1b, $rk9 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 9 3289 3290 pmull $acc_h.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low 3291 ldr $rk12q, [$cc, #192] @ load rk12 3292 3293 aese $ctr7b, $rk10 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 10 3294 aese $ctr1b, $rk10 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 10 3295 aese $ctr2b, $rk10 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 10 3296 3297 eor3 $acc_lb, $acc_lb, $acc_hb, $t11.16b @ MODULO - fold into low 3298 aese $ctr0b, $rk10 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 10 3299 aese $ctr3b, $rk10 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 10 3300 3301 aese $ctr1b, $rk11 @ AES block 8k+9 - round 11 3302 aese $ctr7b, $rk11 @ AES block 8k+15 - round 11 3303 3304 aese $ctr4b, $rk10 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 10 3305 aese $ctr3b, $rk11 @ AES block 8k+11 - round 11 3306 3307 aese $ctr5b, $rk10 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 10 3308 aese $ctr6b, $rk10 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 10 3309 3310 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+15 3311 aese $ctr2b, $rk11 @ AES block 8k+10 - round 11 3312 aese $ctr0b, $rk11 @ AES block 8k+8 - round 11 3313 3314 aese $ctr6b, $rk11 @ AES block 8k+14 - round 11 3315 aese $ctr4b, $rk11 @ AES block 8k+12 - round 11 3316 aese $ctr5b, $rk11 @ AES block 8k+13 - round 11 3317 3318.L192_enc_tail: @ TAIL 3319 3320 ldp $h5q, $h56kq, [$current_tag, #128] @ load h5l | h5h 3321 ext $h5.16b, $h5.16b, $h5.16b, #8 3322 sub $main_end_input_ptr, $end_input_ptr, $input_ptr @ main_end_input_ptr is number of bytes left to process 3323 3324 ldr $ctr_t0q, [$input_ptr], #16 @ AES block 8k+8 - l3ad plaintext 3325 3326 ldp $h78kq, $h8q, [$current_tag, #192] @ load h8k | h7k 3327 ext $h8.16b, $h8.16b, $h8.16b, #8 3328 3329 mov $t1.16b, $rk12 3330 3331 ldp $h6q, $h7q, [$current_tag, #160] @ load h6l | h6h 3332 ext $h6.16b, $h6.16b, $h6.16b, #8 3333 ext $h7.16b, $h7.16b, $h7.16b, #8 3334 cmp $main_end_input_ptr, #112 3335 3336 eor3 $res1b, $ctr_t0b, $ctr0b, $t1.16b @ AES block 8k+8 - result 3337 ext $t0.16b, $acc_lb, $acc_lb, #8 @ prepare final partial tag 3338 b.gt .L192_enc_blocks_more_than_7 3339 3340 cmp $main_end_input_ptr, #96 3341 mov $ctr7b, $ctr6b 3342 movi $acc_h.8b, #0 3343 3344 mov $ctr6b, $ctr5b 3345 movi $acc_l.8b, #0 3346 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 3347 3348 mov $ctr5b, $ctr4b 3349 mov $ctr4b, $ctr3b 3350 mov $ctr3b, $ctr2b 3351 3352 mov $ctr2b, $ctr1b 3353 movi $acc_m.8b, #0 3354 b.gt .L192_enc_blocks_more_than_6 3355 3356 mov $ctr7b, $ctr6b 3357 cmp $main_end_input_ptr, #80 3358 3359 mov $ctr6b, $ctr5b 3360 mov $ctr5b, $ctr4b 3361 mov $ctr4b, $ctr3b 3362 3363 mov $ctr3b, $ctr1b 3364 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 3365 b.gt .L192_enc_blocks_more_than_5 3366 3367 cmp $main_end_input_ptr, #64 3368 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 3369 3370 mov $ctr7b, $ctr6b 3371 mov $ctr6b, $ctr5b 3372 mov $ctr5b, $ctr4b 3373 3374 mov $ctr4b, $ctr1b 3375 b.gt .L192_enc_blocks_more_than_4 3376 3377 mov $ctr7b, $ctr6b 3378 mov $ctr6b, $ctr5b 3379 mov $ctr5b, $ctr1b 3380 3381 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 3382 cmp $main_end_input_ptr, #48 3383 b.gt .L192_enc_blocks_more_than_3 3384 3385 mov $ctr7b, $ctr6b 3386 mov $ctr6b, $ctr1b 3387 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 3388 3389 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k 3390 cmp $main_end_input_ptr, #32 3391 b.gt .L192_enc_blocks_more_than_2 3392 3393 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 3394 3395 cmp $main_end_input_ptr, #16 3396 mov $ctr7b, $ctr1b 3397 b.gt .L192_enc_blocks_more_than_1 3398 3399 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 3400 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k 3401 b .L192_enc_blocks_less_than_1 3402.L192_enc_blocks_more_than_7: @ blocks left > 7 3403 st1 { $res1b}, [$output_ptr], #16 @ AES final-7 block - store result 3404 3405 rev64 $res0b, $res1b @ GHASH final-7 block 3406 ins $acc_m.d[0], $h78k.d[1] @ GHASH final-7 block - mid 3407 3408 eor $res0b, $res0b, $t0.16b @ feed in partial tag 3409 3410 ins $rk4v.d[0], $res0.d[1] @ GHASH final-7 block - mid 3411 3412 ldr $ctr_t1q, [$input_ptr], #16 @ AES final-6 block - load plaintext 3413 3414 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-7 block - mid 3415 movi $t0.8b, #0 @ supress further partial tag feed in 3416 pmull $acc_l.1q, $res0.1d, $h8.1d @ GHASH final-7 block - low 3417 3418 pmull2 $acc_h.1q, $res0.2d, $h8.2d @ GHASH final-7 block - high 3419 3420 pmull $acc_m.1q, $rk4v.1d, $acc_m.1d @ GHASH final-7 block - mid 3421 eor3 $res1b, $ctr_t1b, $ctr1b, $t1.16b @ AES final-6 block - result 3422.L192_enc_blocks_more_than_6: @ blocks left > 6 3423 3424 st1 { $res1b}, [$output_ptr], #16 @ AES final-6 block - store result 3425 3426 rev64 $res0b, $res1b @ GHASH final-6 block 3427 3428 ldr $ctr_t1q, [$input_ptr], #16 @ AES final-5 block - load plaintext 3429 3430 eor $res0b, $res0b, $t0.16b @ feed in partial tag 3431 3432 ins $rk4v.d[0], $res0.d[1] @ GHASH final-6 block - mid 3433 3434 pmull $rk3q1, $res0.1d, $h7.1d @ GHASH final-6 block - low 3435 eor3 $res1b, $ctr_t1b, $ctr2b, $t1.16b @ AES final-5 block - result 3436 3437 movi $t0.8b, #0 @ supress further partial tag feed in 3438 pmull2 $rk2q1, $res0.2d, $h7.2d @ GHASH final-6 block - high 3439 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-6 block - mid 3440 3441 pmull $rk4v.1q, $rk4v.1d, $h78k.1d @ GHASH final-6 block - mid 3442 3443 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-6 block - high 3444 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-6 block - low 3445 3446 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-6 block - mid 3447.L192_enc_blocks_more_than_5: @ blocks left > 5 3448 3449 st1 { $res1b}, [$output_ptr], #16 @ AES final-5 block - store result 3450 3451 rev64 $res0b, $res1b @ GHASH final-5 block 3452 3453 eor $res0b, $res0b, $t0.16b @ feed in partial tag 3454 3455 ins $rk4v.d[0], $res0.d[1] @ GHASH final-5 block - mid 3456 3457 ldr $ctr_t1q, [$input_ptr], #16 @ AES final-4 block - load plaintext 3458 pmull2 $rk2q1, $res0.2d, $h6.2d @ GHASH final-5 block - high 3459 3460 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-5 block - mid 3461 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-5 block - high 3462 3463 ins $rk4v.d[1], $rk4v.d[0] @ GHASH final-5 block - mid 3464 pmull $rk3q1, $res0.1d, $h6.1d @ GHASH final-5 block - low 3465 3466 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-5 block - low 3467 pmull2 $rk4v.1q, $rk4v.2d, $h56k.2d @ GHASH final-5 block - mid 3468 3469 eor3 $res1b, $ctr_t1b, $ctr3b, $t1.16b @ AES final-4 block - result 3470 movi $t0.8b, #0 @ supress further partial tag feed in 3471 3472 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-5 block - mid 3473.L192_enc_blocks_more_than_4: @ blocks left > 4 3474 3475 st1 { $res1b}, [$output_ptr], #16 @ AES final-4 block - store result 3476 3477 rev64 $res0b, $res1b @ GHASH final-4 block 3478 3479 eor $res0b, $res0b, $t0.16b @ feed in partial tag 3480 3481 ldr $ctr_t1q, [$input_ptr], #16 @ AES final-3 block - load plaintext 3482 pmull2 $rk2q1, $res0.2d, $h5.2d @ GHASH final-4 block - high 3483 ins $rk4v.d[0], $res0.d[1] @ GHASH final-4 block - mid 3484 3485 pmull $rk3q1, $res0.1d, $h5.1d @ GHASH final-4 block - low 3486 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-4 block - high 3487 3488 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-4 block - mid 3489 3490 movi $t0.8b, #0 @ supress further partial tag feed in 3491 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-4 block - low 3492 3493 pmull $rk4v.1q, $rk4v.1d, $h56k.1d @ GHASH final-4 block - mid 3494 3495 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-4 block - mid 3496 eor3 $res1b, $ctr_t1b, $ctr4b, $t1.16b @ AES final-3 block - result 3497.L192_enc_blocks_more_than_3: @ blocks left > 3 3498 3499 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k 3500 st1 { $res1b}, [$output_ptr], #16 @ AES final-3 block - store result 3501 3502 rev64 $res0b, $res1b @ GHASH final-3 block 3503 3504 eor $res0b, $res0b, $t0.16b @ feed in partial tag 3505 movi $t0.8b, #0 @ supress further partial tag feed in 3506 3507 ldr $ctr_t1q, [$input_ptr], #16 @ AES final-2 block - load plaintext 3508 ldr $h4q, [$current_tag, #112] @ load h4l | h4h 3509 ext $h4.16b, $h4.16b, $h4.16b, #8 3510 3511 ins $rk4v.d[0], $res0.d[1] @ GHASH final-3 block - mid 3512 3513 eor3 $res1b, $ctr_t1b, $ctr5b, $t1.16b @ AES final-2 block - result 3514 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-3 block - mid 3515 3516 ins $rk4v.d[1], $rk4v.d[0] @ GHASH final-3 block - mid 3517 pmull $rk3q1, $res0.1d, $h4.1d @ GHASH final-3 block - low 3518 3519 pmull2 $rk2q1, $res0.2d, $h4.2d @ GHASH final-3 block - high 3520 pmull2 $rk4v.1q, $rk4v.2d, $h34k.2d @ GHASH final-3 block - mid 3521 3522 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-3 block - low 3523 3524 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-3 block - mid 3525 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-3 block - high 3526.L192_enc_blocks_more_than_2: @ blocks left > 2 3527 3528 st1 { $res1b}, [$output_ptr], #16 @ AES final-2 block - store result 3529 3530 rev64 $res0b, $res1b @ GHASH final-2 block 3531 ldr $h3q, [$current_tag, #80] @ load h3l | h3h 3532 ext $h3.16b, $h3.16b, $h3.16b, #8 3533 3534 eor $res0b, $res0b, $t0.16b @ feed in partial tag 3535 3536 ldr $ctr_t1q, [$input_ptr], #16 @ AES final-1 block - load plaintext 3537 ins $rk4v.d[0], $res0.d[1] @ GHASH final-2 block - mid 3538 3539 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-2 block - mid 3540 3541 pmull $rk3q1, $res0.1d, $h3.1d @ GHASH final-2 block - low 3542 pmull2 $rk2q1, $res0.2d, $h3.2d @ GHASH final-2 block - high 3543 movi $t0.8b, #0 @ supress further partial tag feed in 3544 3545 pmull $rk4v.1q, $rk4v.1d, $h34k.1d @ GHASH final-2 block - mid 3546 3547 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-2 block - low 3548 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-2 block - high 3549 3550 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-2 block - mid 3551 eor3 $res1b, $ctr_t1b, $ctr6b, $t1.16b @ AES final-1 block - result 3552.L192_enc_blocks_more_than_1: @ blocks left > 1 3553 3554 ldr $h2q, [$current_tag, #64] @ load h1l | h1h 3555 ext $h2.16b, $h2.16b, $h2.16b, #8 3556 st1 { $res1b}, [$output_ptr], #16 @ AES final-1 block - store result 3557 3558 rev64 $res0b, $res1b @ GHASH final-1 block 3559 3560 eor $res0b, $res0b, $t0.16b @ feed in partial tag 3561 3562 ins $rk4v.d[0], $res0.d[1] @ GHASH final-1 block - mid 3563 pmull $rk3q1, $res0.1d, $h2.1d @ GHASH final-1 block - low 3564 3565 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-1 block - low 3566 pmull2 $rk2q1, $res0.2d, $h2.2d @ GHASH final-1 block - high 3567 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-1 block - mid 3568 3569 ldr $ctr_t1q, [$input_ptr], #16 @ AES final block - load plaintext 3570 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k 3571 3572 ins $rk4v.d[1], $rk4v.d[0] @ GHASH final-1 block - mid 3573 3574 eor3 $res1b, $ctr_t1b, $ctr7b, $t1.16b @ AES final block - result 3575 pmull2 $rk4v.1q, $rk4v.2d, $h12k.2d @ GHASH final-1 block - mid 3576 3577 movi $t0.8b, #0 @ supress further partial tag feed in 3578 3579 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-1 block - mid 3580 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-1 block - high 3581.L192_enc_blocks_less_than_1: @ blocks left <= 1 3582 3583 mvn $temp0_x, xzr @ temp0_x = 0xffffffffffffffff 3584 and $bit_length, $bit_length, #127 @ bit_length %= 128 3585 3586 sub $bit_length, $bit_length, #128 @ bit_length -= 128 3587 3588 neg $bit_length, $bit_length @ bit_length = 128 - #bits in input (in range [1,128]) 3589 3590 and $bit_length, $bit_length, #127 @ bit_length %= 128 3591 3592 lsr $temp0_x, $temp0_x, $bit_length @ temp0_x is mask for top 64b of last block 3593 cmp $bit_length, #64 3594 mvn $temp1_x, xzr @ temp1_x = 0xffffffffffffffff 3595 3596 csel $temp2_x, $temp1_x, $temp0_x, lt 3597 csel $temp3_x, $temp0_x, xzr, lt 3598 3599 mov $ctr0.d[1], $temp3_x 3600 ldr $h1q, [$current_tag, #32] @ load h1l | h1h 3601 ext $h1.16b, $h1.16b, $h1.16b, #8 3602 3603 ld1 { $rk0}, [$output_ptr] @ load existing bytes where the possibly partial last block is to be stored 3604 mov $ctr0.d[0], $temp2_x @ ctr0b is mask for last block 3605 3606 and $res1b, $res1b, $ctr0b @ possibly partial last block has zeroes in highest bits 3607 3608 rev64 $res0b, $res1b @ GHASH final block 3609 bif $res1b, $rk0, $ctr0b @ insert existing bytes in top end of result before storing 3610 3611 st1 { $res1b}, [$output_ptr] @ store all 16B 3612 3613 eor $res0b, $res0b, $t0.16b @ feed in partial tag 3614 3615 ins $t0.d[0], $res0.d[1] @ GHASH final block - mid 3616 pmull2 $rk2q1, $res0.2d, $h1.2d @ GHASH final block - high 3617 3618 eor $acc_hb, $acc_hb, $rk2 @ GHASH final block - high 3619 pmull $rk3q1, $res0.1d, $h1.1d @ GHASH final block - low 3620 3621 eor $t0.8b, $t0.8b, $res0.8b @ GHASH final block - mid 3622 3623 pmull $t0.1q, $t0.1d, $h12k.1d @ GHASH final block - mid 3624 3625 eor $acc_mb, $acc_mb, $t0.16b @ GHASH final block - mid 3626 ldr $mod_constantd, [$modulo_constant] @ MODULO - load modulo constant 3627 3628 eor $acc_lb, $acc_lb, $rk3 @ GHASH final block - low 3629 ext $t11.16b, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment 3630 3631 rev32 $rtmp_ctr.16b, $rtmp_ctr.16b 3632 3633 str $rtmp_ctrq, [$counter] @ store the updated counter 3634 eor3 $acc_mb, $acc_mb, $acc_hb, $acc_lb @ MODULO - karatsuba tidy up 3635 3636 pmull $t12.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid 3637 3638 eor3 $acc_mb, $acc_mb, $t12.16b, $t11.16b @ MODULO - fold into mid 3639 3640 pmull $acc_h.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low 3641 ext $t11.16b, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment 3642 3643 eor3 $acc_lb, $acc_lb, $acc_hb, $t11.16b @ MODULO - fold into low 3644 ext $acc_lb, $acc_lb, $acc_lb, #8 3645 rev64 $acc_lb, $acc_lb 3646 st1 { $acc_l.16b }, [$current_tag] 3647 3648 lsr x0, $bit_length, #3 @ return sizes 3649 3650 ldp d10, d11, [sp, #16] 3651 ldp d12, d13, [sp, #32] 3652 ldp d14, d15, [sp, #48] 3653 ldp d8, d9, [sp], #80 3654 ret 3655 3656.L192_enc_ret: 3657 mov w0, #0x0 3658 ret 3659.size unroll8_eor3_aes_gcm_enc_192_kernel,.-unroll8_eor3_aes_gcm_enc_192_kernel 3660___ 3661 3662######################################################################################### 3663# size_t unroll8_eor3_aes_gcm_dec_192_kernel(const unsigned char *in, 3664# size_t len, 3665# unsigned char *out, 3666# const void *key, 3667# unsigned char ivec[16], 3668# u64 *Xi); 3669# 3670$code.=<<___; 3671.global unroll8_eor3_aes_gcm_dec_192_kernel 3672.type unroll8_eor3_aes_gcm_dec_192_kernel,%function 3673.align 4 3674unroll8_eor3_aes_gcm_dec_192_kernel: 3675 AARCH64_VALID_CALL_TARGET 3676 cbz x1, .L192_dec_ret 3677 stp d8, d9, [sp, #-80]! 3678 mov $counter, x4 3679 mov $cc, x5 3680 stp d10, d11, [sp, #16] 3681 stp d12, d13, [sp, #32] 3682 stp d14, d15, [sp, #48] 3683 mov x5, #0xc200000000000000 3684 stp x5, xzr, [sp, #64] 3685 add $modulo_constant, sp, #64 3686 3687 lsr $main_end_input_ptr, $bit_length, #3 @ byte_len 3688 ld1 { $ctr0b}, [$counter] @ CTR block 0 3689 ld1 { $acc_lb}, [$current_tag] 3690 3691 mov $constant_temp, #0x100000000 @ set up counter increment 3692 movi $rctr_inc.16b, #0x0 3693 mov $rctr_inc.d[1], $constant_temp 3694 3695 rev32 $rtmp_ctr.16b, $ctr0.16b @ set up reversed counter 3696 3697 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 0 3698 3699 rev32 $ctr1.16b, $rtmp_ctr.16b @ CTR block 1 3700 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 1 3701 3702 rev32 $ctr2.16b, $rtmp_ctr.16b @ CTR block 2 3703 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 2 3704 3705 rev32 $ctr3.16b, $rtmp_ctr.16b @ CTR block 3 3706 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 3 3707 3708 rev32 $ctr4.16b, $rtmp_ctr.16b @ CTR block 4 3709 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 4 3710 3711 rev32 $ctr5.16b, $rtmp_ctr.16b @ CTR block 5 3712 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 5 3713 ldp $rk0q, $rk1q, [$cc, #0] @ load rk0, rk1 3714 3715 rev32 $ctr6.16b, $rtmp_ctr.16b @ CTR block 6 3716 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 6 3717 3718 rev32 $ctr7.16b, $rtmp_ctr.16b @ CTR block 7 3719 3720 aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 0 3721 aese $ctr6b, $rk0 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 0 3722 aese $ctr5b, $rk0 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 0 3723 3724 aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 0 3725 aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 0 3726 aese $ctr7b, $rk0 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 0 3727 3728 aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 0 3729 aese $ctr4b, $rk0 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 0 3730 ldp $rk2q, $rk3q, [$cc, #32] @ load rk2, rk3 3731 3732 aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 1 3733 3734 aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 1 3735 3736 aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 1 3737 aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 1 3738 aese $ctr7b, $rk1 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 1 3739 3740 aese $ctr5b, $rk1 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 1 3741 aese $ctr6b, $rk1 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 1 3742 3743 aese $ctr7b, $rk2 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 2 3744 aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 2 3745 aese $ctr4b, $rk1 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 1 3746 3747 aese $ctr5b, $rk2 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 2 3748 aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 2 3749 aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 2 3750 3751 aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 2 3752 aese $ctr4b, $rk2 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 2 3753 aese $ctr6b, $rk2 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 2 3754 3755 aese $ctr7b, $rk3 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 3 3756 3757 ldp $rk4q, $rk5q, [$cc, #64] @ load rk4, rk5 3758 aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 3 3759 aese $ctr5b, $rk3 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 3 3760 3761 aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 3 3762 aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 3 3763 3764 aese $ctr4b, $rk3 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 3 3765 aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 3 3766 aese $ctr6b, $rk3 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 3 3767 3768 aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 4 3769 aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 4 3770 aese $ctr5b, $rk4 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 4 3771 3772 aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 4 3773 aese $ctr7b, $rk4 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 4 3774 aese $ctr6b, $rk4 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 4 3775 3776 aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 4 3777 aese $ctr5b, $rk5 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 5 3778 aese $ctr4b, $rk4 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 4 3779 3780 aese $ctr6b, $rk5 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 5 3781 ldp $rk6q, $rk7q, [$cc, #96] @ load rk6, rk7 3782 3783 aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 5 3784 aese $ctr4b, $rk5 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 5 3785 aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 5 3786 3787 aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 5 3788 aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 5 3789 aese $ctr7b, $rk5 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 5 3790 3791 sub $main_end_input_ptr, $main_end_input_ptr, #1 @ byte_len - 1 3792 3793 aese $ctr4b, $rk6 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 6 3794 aese $ctr5b, $rk6 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 6 3795 aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 6 3796 3797 aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 6 3798 aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 6 3799 aese $ctr6b, $rk6 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 6 3800 3801 aese $ctr7b, $rk6 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 6 3802 aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 6 3803 ldp $rk8q, $rk9q, [$cc, #128] @ load rk8, rk9 3804 3805 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 7 3806 3807 aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 7 3808 aese $ctr7b, $rk7 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 7 3809 3810 aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 7 3811 aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 7 3812 aese $ctr4b, $rk7 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 7 3813 3814 aese $ctr6b, $rk7 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 7 3815 aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 7 3816 aese $ctr5b, $rk7 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 7 3817 3818 aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 8 3819 aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 8 3820 and $main_end_input_ptr, $main_end_input_ptr, #0xffffffffffffff80 @ number of bytes to be processed in main loop (at least 1 byte must be handled by tail) 3821 3822 aese $ctr7b, $rk8 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 8 3823 aese $ctr6b, $rk8 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 8 3824 aese $ctr5b, $rk8 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 8 3825 3826 aese $ctr4b, $rk8 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 8 3827 aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 8 3828 aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 8 3829 3830 add $end_input_ptr, $input_ptr, $bit_length, lsr #3 @ end_input_ptr 3831 aese $ctr6b, $rk9 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 9 3832 3833 ld1 { $acc_lb}, [$current_tag] 3834 ext $acc_lb, $acc_lb, $acc_lb, #8 3835 rev64 $acc_lb, $acc_lb 3836 3837 ldp $rk10q, $rk11q, [$cc, #160] @ load rk10, rk11 3838 3839 aese $ctr0b, $rk9 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 9 3840 add $main_end_input_ptr, $main_end_input_ptr, $input_ptr 3841 3842 aese $ctr1b, $rk9 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 9 3843 aese $ctr7b, $rk9 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 9 3844 aese $ctr4b, $rk9 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 9 3845 3846 cmp $input_ptr, $main_end_input_ptr @ check if we have <= 8 blocks 3847 aese $ctr3b, $rk9 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 9 3848 3849 aese $ctr5b, $rk9 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 9 3850 aese $ctr2b, $rk9 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 9 3851 3852 aese $ctr3b, $rk10 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 10 3853 aese $ctr1b, $rk10 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 10 3854 aese $ctr7b, $rk10 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 10 3855 3856 aese $ctr4b, $rk10 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 10 3857 aese $ctr0b, $rk10 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 10 3858 aese $ctr2b, $rk10 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 10 3859 3860 aese $ctr6b, $rk10 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 10 3861 aese $ctr5b, $rk10 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 10 3862 ldr $rk12q, [$cc, #192] @ load rk12 3863 3864 aese $ctr0b, $rk11 @ AES block 0 - round 11 3865 aese $ctr1b, $rk11 @ AES block 1 - round 11 3866 aese $ctr4b, $rk11 @ AES block 4 - round 11 3867 3868 aese $ctr6b, $rk11 @ AES block 6 - round 11 3869 aese $ctr5b, $rk11 @ AES block 5 - round 11 3870 aese $ctr7b, $rk11 @ AES block 7 - round 11 3871 3872 aese $ctr2b, $rk11 @ AES block 2 - round 11 3873 aese $ctr3b, $rk11 @ AES block 3 - round 11 3874 b.ge .L192_dec_tail @ handle tail 3875 3876 ldp $res0q, $res1q, [$input_ptr], #32 @ AES block 0, 1 - load ciphertext 3877 3878 ldp $res2q, $res3q, [$input_ptr], #32 @ AES block 2, 3 - load ciphertext 3879 3880 ldp $res4q, $res5q, [$input_ptr], #32 @ AES block 4, 5 - load ciphertext 3881 3882 eor3 $ctr1b, $res1b, $ctr1b, $rk12 @ AES block 1 - result 3883 eor3 $ctr0b, $res0b, $ctr0b, $rk12 @ AES block 0 - result 3884 stp $ctr0q, $ctr1q, [$output_ptr], #32 @ AES block 0, 1 - store result 3885 3886 rev32 $ctr0.16b, $rtmp_ctr.16b @ CTR block 8 3887 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8 3888 3889 rev32 $ctr1.16b, $rtmp_ctr.16b @ CTR block 9 3890 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 9 3891 eor3 $ctr3b, $res3b, $ctr3b, $rk12 @ AES block 3 - result 3892 3893 eor3 $ctr2b, $res2b, $ctr2b, $rk12 @ AES block 2 - result 3894 stp $ctr2q, $ctr3q, [$output_ptr], #32 @ AES block 2, 3 - store result 3895 ldp $res6q, $res7q, [$input_ptr], #32 @ AES block 6, 7 - load ciphertext 3896 3897 rev32 $ctr2.16b, $rtmp_ctr.16b @ CTR block 10 3898 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 10 3899 3900 eor3 $ctr4b, $res4b, $ctr4b, $rk12 @ AES block 4 - result 3901 3902 rev32 $ctr3.16b, $rtmp_ctr.16b @ CTR block 11 3903 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 11 3904 3905 eor3 $ctr5b, $res5b, $ctr5b, $rk12 @ AES block 5 - result 3906 stp $ctr4q, $ctr5q, [$output_ptr], #32 @ AES block 4, 5 - store result 3907 cmp $input_ptr, $main_end_input_ptr @ check if we have <= 8 blocks 3908 3909 eor3 $ctr6b, $res6b, $ctr6b, $rk12 @ AES block 6 - result 3910 eor3 $ctr7b, $res7b, $ctr7b, $rk12 @ AES block 7 - result 3911 rev32 $ctr4.16b, $rtmp_ctr.16b @ CTR block 12 3912 3913 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 12 3914 stp $ctr6q, $ctr7q, [$output_ptr], #32 @ AES block 6, 7 - store result 3915 b.ge .L192_dec_prepretail @ do prepretail 3916 3917.L192_dec_main_loop: @ main loop start 3918 rev64 $res1b, $res1b @ GHASH block 8k+1 3919 ldp $rk0q, $rk1q, [$cc, #0] @ load rk0, rk1 3920 ext $acc_lb, $acc_lb, $acc_lb, #8 @ PRE 0 3921 3922 rev64 $res0b, $res0b @ GHASH block 8k 3923 rev32 $ctr5.16b, $rtmp_ctr.16b @ CTR block 8k+13 3924 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+13 3925 3926 ldr $h7q, [$current_tag, #176] @ load h7l | h7h 3927 ext $h7.16b, $h7.16b, $h7.16b, #8 3928 ldr $h8q, [$current_tag, #208] @ load h8l | h8h 3929 ext $h8.16b, $h8.16b, $h8.16b, #8 3930 rev64 $res4b, $res4b @ GHASH block 8k+4 3931 rev64 $res3b, $res3b @ GHASH block 8k+3 3932 3933 eor $res0b, $res0b, $acc_lb @ PRE 1 3934 rev32 $ctr6.16b, $rtmp_ctr.16b @ CTR block 8k+14 3935 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+14 3936 3937 rev64 $res5b, $res5b @ GHASH block 8k+5 3938 3939 rev32 $ctr7.16b, $rtmp_ctr.16b @ CTR block 8k+15 3940 aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 0 3941 aese $ctr6b, $rk0 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 0 3942 3943 aese $ctr5b, $rk0 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 0 3944 aese $ctr4b, $rk0 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 0 3945 aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 0 3946 3947 aese $ctr7b, $rk0 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 0 3948 aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 0 3949 aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 0 3950 3951 pmull $acc_l.1q, $res0.1d, $h8.1d @ GHASH block 8k - low 3952 pmull2 $t0.1q, $res1.2d, $h7.2d @ GHASH block 8k+1 - high 3953 ldp $rk2q, $rk3q, [$cc, #32] @ load rk2, rk3 3954 3955 aese $ctr6b, $rk1 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 1 3956 pmull $h7.1q, $res1.1d, $h7.1d @ GHASH block 8k+1 - low 3957 ldr $h5q, [$current_tag, #128] @ load h5l | h5h 3958 ext $h5.16b, $h5.16b, $h5.16b, #8 3959 ldr $h6q, [$current_tag, #160] @ load h6l | h6h 3960 ext $h6.16b, $h6.16b, $h6.16b, #8 3961 3962 aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 1 3963 aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 1 3964 aese $ctr7b, $rk1 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 1 3965 3966 pmull2 $acc_h.1q, $res0.2d, $h8.2d @ GHASH block 8k - high 3967 aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 1 3968 aese $ctr4b, $rk1 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 1 3969 3970 trn1 $acc_m.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid 3971 rev64 $res2b, $res2b @ GHASH block 8k+2 3972 aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 1 3973 3974 aese $ctr5b, $rk1 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 1 3975 ldr $h56kq, [$current_tag, #144] @ load h6k | h5k 3976 ldr $h78kq, [$current_tag, #192] @ load h8k | h7k 3977 trn2 $res0.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid 3978 3979 eor $acc_hb, $acc_hb, $t0.16b @ GHASH block 8k+1 - high 3980 pmull2 $t2.1q, $res3.2d, $h5.2d @ GHASH block 8k+3 - high 3981 pmull2 $t1.1q, $res2.2d, $h6.2d @ GHASH block 8k+2 - high 3982 3983 eor $res0.16b, $res0.16b, $acc_m.16b @ GHASH block 8k, 8k+1 - mid 3984 eor $acc_lb, $acc_lb, $h7.16b @ GHASH block 8k+1 - low 3985 aese $ctr6b, $rk2 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 2 3986 3987 aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 2 3988 pmull $h5.1q, $res3.1d, $h5.1d @ GHASH block 8k+3 - low 3989 eor3 $acc_hb, $acc_hb, $t1.16b, $t2.16b @ GHASH block 8k+2, 8k+3 - high 3990 3991 aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 2 3992 aese $ctr6b, $rk3 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 3 3993 aese $ctr4b, $rk2 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 2 3994 3995 aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 2 3996 aese $ctr7b, $rk2 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 2 3997 aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 2 3998 3999 ldr $h3q, [$current_tag, #80] @ load h3l | h3h 4000 ext $h3.16b, $h3.16b, $h3.16b, #8 4001 ldr $h4q, [$current_tag, #112] @ load h4l | h4h 4002 ext $h4.16b, $h4.16b, $h4.16b, #8 4003 aese $ctr5b, $rk2 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 2 4004 aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 3 4005 4006 pmull $h6.1q, $res2.1d, $h6.1d @ GHASH block 8k+2 - low 4007 trn1 $t3.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid 4008 trn2 $res2.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid 4009 4010 aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 3 4011 aese $ctr4b, $rk3 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 3 4012 4013 aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 3 4014 aese $ctr7b, $rk3 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 3 4015 ldp $rk4q, $rk5q, [$cc, #64] @ load rk4, rk5 4016 4017 eor $res2.16b, $res2.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid 4018 eor3 $acc_lb, $acc_lb, $h6.16b, $h5.16b @ GHASH block 8k+2, 8k+3 - low 4019 aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 3 4020 4021 trn1 $t6.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid 4022 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+15 4023 4024 pmull2 $t3.1q, $res2.2d, $h56k.2d @ GHASH block 8k+2 - mid 4025 pmull2 $acc_m.1q, $res0.2d, $h78k.2d @ GHASH block 8k - mid 4026 pmull $h78k.1q, $res0.1d, $h78k.1d @ GHASH block 8k+1 - mid 4027 4028 aese $ctr5b, $rk3 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 3 4029 pmull $h56k.1q, $res2.1d, $h56k.1d @ GHASH block 8k+3 - mid 4030 pmull2 $t4.1q, $res4.2d, $h4.2d @ GHASH block 8k+4 - high 4031 4032 aese $ctr4b, $rk4 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 4 4033 aese $ctr6b, $rk4 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 4 4034 eor $acc_mb, $acc_mb, $h78k.16b @ GHASH block 8k+1 - mid 4035 4036 aese $ctr5b, $rk4 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 4 4037 aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 4 4038 aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 4 4039 4040 aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 4 4041 aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 4 4042 aese $ctr7b, $rk4 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 4 4043 4044 ldr $h1q, [$current_tag, #32] @ load h1l | h1h 4045 ext $h1.16b, $h1.16b, $h1.16b, #8 4046 ldr $h2q, [$current_tag, #64] @ load h2l | h2h 4047 ext $h2.16b, $h2.16b, $h2.16b, #8 4048 aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 5 4049 aese $ctr5b, $rk5 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 5 4050 4051 ldp $rk6q, $rk7q, [$cc, #96] @ load rk6, rk7 4052 aese $ctr7b, $rk5 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 5 4053 rev64 $res7b, $res7b @ GHASH block 8k+7 4054 4055 aese $ctr4b, $rk5 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 5 4056 eor3 $acc_mb, $acc_mb, $h56k.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid 4057 aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 5 4058 4059 pmull $h4.1q, $res4.1d, $h4.1d @ GHASH block 8k+4 - low 4060 trn2 $res4.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid 4061 aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 5 4062 4063 aese $ctr6b, $rk5 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 5 4064 aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 5 4065 rev64 $res6b, $res6b @ GHASH block 8k+6 4066 4067 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k 4068 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k 4069 pmull2 $t5.1q, $res5.2d, $h3.2d @ GHASH block 8k+5 - high 4070 pmull $h3.1q, $res5.1d, $h3.1d @ GHASH block 8k+5 - low 4071 4072 aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 6 4073 eor $res4.16b, $res4.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid 4074 trn1 $t9.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid 4075 4076 aese $ctr7b, $rk6 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 6 4077 aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 6 4078 aese $ctr6b, $rk6 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 6 4079 4080 pmull2 $t7.1q, $res6.2d, $h2.2d @ GHASH block 8k+6 - high 4081 aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 6 4082 aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 6 4083 4084 aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 7 4085 aese $ctr6b, $rk7 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 7 4086 aese $ctr5b, $rk6 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 6 4087 4088 pmull2 $t6.1q, $res4.2d, $h34k.2d @ GHASH block 8k+4 - mid 4089 eor3 $acc_hb, $acc_hb, $t4.16b, $t5.16b @ GHASH block 8k+4, 8k+5 - high 4090 eor3 $acc_lb, $acc_lb, $h4.16b, $h3.16b @ GHASH block 8k+4, 8k+5 - low 4091 4092 pmull $h2.1q, $res6.1d, $h2.1d @ GHASH block 8k+6 - low 4093 trn2 $res6.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid 4094 aese $ctr4b, $rk6 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 6 4095 4096 aese $ctr5b, $rk7 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 7 4097 ldp $rk8q, $rk9q, [$cc, #128] @ load rk8, rk9 4098 aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 7 4099 4100 eor $res6.16b, $res6.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid 4101 pmull $h34k.1q, $res4.1d, $h34k.1d @ GHASH block 8k+5 - mid 4102 aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 7 4103 4104 aese $ctr4b, $rk7 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 7 4105 aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 7 4106 aese $ctr7b, $rk7 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 7 4107 4108 eor3 $acc_mb, $acc_mb, $h34k.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid 4109 pmull2 $t9.1q, $res6.2d, $h12k.2d @ GHASH block 8k+6 - mid 4110 pmull2 $t8.1q, $res7.2d, $h1.2d @ GHASH block 8k+7 - high 4111 4112 pmull $h12k.1q, $res6.1d, $h12k.1d @ GHASH block 8k+7 - mid 4113 ldr $mod_constantd, [$modulo_constant] @ MODULO - load modulo constant 4114 pmull $h1.1q, $res7.1d, $h1.1d @ GHASH block 8k+7 - low 4115 4116 aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 8 4117 aese $ctr5b, $rk8 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 8 4118 aese $ctr7b, $rk8 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 8 4119 4120 aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 8 4121 aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 8 4122 eor3 $acc_lb, $acc_lb, $h2.16b, $h1.16b @ GHASH block 8k+6, 8k+7 - low 4123 4124 aese $ctr4b, $rk8 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 8 4125 aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 8 4126 aese $ctr6b, $rk8 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 8 4127 4128 eor3 $acc_hb, $acc_hb, $t7.16b, $t8.16b @ GHASH block 8k+6, 8k+7 - high 4129 rev32 $h1.16b, $rtmp_ctr.16b @ CTR block 8k+16 4130 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+16 4131 4132 aese $ctr5b, $rk9 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 9 4133 eor3 $acc_mb, $acc_mb, $h12k.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid 4134 aese $ctr1b, $rk9 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 9 4135 4136 aese $ctr3b, $rk9 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 9 4137 aese $ctr7b, $rk9 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 9 4138 ldp $rk10q, $rk11q, [$cc, #160] @ load rk10, rk11 4139 4140 eor3 $acc_mb, $acc_mb, $acc_hb, $acc_lb @ MODULO - karatsuba tidy up 4141 ldp $res0q, $res1q, [$input_ptr], #32 @ AES block 8k+8, 8k+9 - load ciphertext 4142 4143 aese $ctr2b, $rk9 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 9 4144 aese $ctr0b, $rk9 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 9 4145 ldp $res2q, $res3q, [$input_ptr], #32 @ AES block 8k+10, 8k+11 - load ciphertext 4146 4147 rev32 $h2.16b, $rtmp_ctr.16b @ CTR block 8k+17 4148 pmull $t12.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid 4149 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+17 4150 4151 aese $ctr6b, $rk9 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 9 4152 aese $ctr4b, $rk9 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 9 4153 ext $t11.16b, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment 4154 4155 aese $ctr3b, $rk10 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 10 4156 aese $ctr7b, $rk10 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 10 4157 ldp $res4q, $res5q, [$input_ptr], #32 @ AES block 8k+12, 8k+13 - load ciphertext 4158 4159 rev32 $h3.16b, $rtmp_ctr.16b @ CTR block 8k+18 4160 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+18 4161 eor3 $acc_mb, $acc_mb, $t12.16b, $t11.16b @ MODULO - fold into mid 4162 4163 aese $ctr0b, $rk10 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 10 4164 aese $ctr1b, $rk10 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 10 4165 ldr $rk12q, [$cc, #192] @ load rk12 4166 4167 ldp $res6q, $res7q, [$input_ptr], #32 @ AES block 8k+14, 8k+15 - load ciphertext 4168 aese $ctr4b, $rk10 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 10 4169 aese $ctr6b, $rk10 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 10 4170 4171 aese $ctr0b, $rk11 @ AES block 8k+8 - round 11 4172 ext $t11.16b, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment 4173 aese $ctr1b, $rk11 @ AES block 8k+9 - round 11 4174 4175 aese $ctr2b, $rk10 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 10 4176 aese $ctr6b, $rk11 @ AES block 8k+14 - round 11 4177 aese $ctr3b, $rk11 @ AES block 8k+11 - round 11 4178 4179 eor3 $ctr0b, $res0b, $ctr0b, $rk12 @ AES block 8k+8 - result 4180 rev32 $h4.16b, $rtmp_ctr.16b @ CTR block 8k+19 4181 aese $ctr5b, $rk10 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 10 4182 4183 aese $ctr4b, $rk11 @ AES block 8k+12 - round 11 4184 aese $ctr2b, $rk11 @ AES block 8k+10 - round 11 4185 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+19 4186 4187 aese $ctr7b, $rk11 @ AES block 8k+15 - round 11 4188 aese $ctr5b, $rk11 @ AES block 8k+13 - round 11 4189 pmull $acc_h.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low 4190 4191 eor3 $ctr1b, $res1b, $ctr1b, $rk12 @ AES block 8k+9 - result 4192 stp $ctr0q, $ctr1q, [$output_ptr], #32 @ AES block 8k+8, 8k+9 - store result 4193 eor3 $ctr3b, $res3b, $ctr3b, $rk12 @ AES block 8k+11 - result 4194 4195 eor3 $ctr2b, $res2b, $ctr2b, $rk12 @ AES block 8k+10 - result 4196 eor3 $ctr7b, $res7b, $ctr7b, $rk12 @ AES block 8k+15 - result 4197 stp $ctr2q, $ctr3q, [$output_ptr], #32 @ AES block 8k+10, 8k+11 - store result 4198 4199 eor3 $ctr5b, $res5b, $ctr5b, $rk12 @ AES block 8k+13 - result 4200 eor3 $acc_lb, $acc_lb, $acc_hb, $t11.16b @ MODULO - fold into low 4201 mov $ctr3.16b, $h4.16b @ CTR block 8k+19 4202 4203 eor3 $ctr4b, $res4b, $ctr4b, $rk12 @ AES block 8k+12 - result 4204 stp $ctr4q, $ctr5q, [$output_ptr], #32 @ AES block 8k+12, 8k+13 - store result 4205 cmp $input_ptr, $main_end_input_ptr @ LOOP CONTROL 4206 4207 eor3 $ctr6b, $res6b, $ctr6b, $rk12 @ AES block 8k+14 - result 4208 stp $ctr6q, $ctr7q, [$output_ptr], #32 @ AES block 8k+14, 8k+15 - store result 4209 mov $ctr0.16b, $h1.16b @ CTR block 8k+16 4210 4211 mov $ctr1.16b, $h2.16b @ CTR block 8k+17 4212 mov $ctr2.16b, $h3.16b @ CTR block 8k+18 4213 4214 rev32 $ctr4.16b, $rtmp_ctr.16b @ CTR block 8k+20 4215 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+20 4216 b.lt .L192_dec_main_loop 4217 4218.L192_dec_prepretail: @ PREPRETAIL 4219 ldp $rk0q, $rk1q, [$cc, #0] @ load rk0, rk1 4220 rev32 $ctr5.16b, $rtmp_ctr.16b @ CTR block 8k+13 4221 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+13 4222 4223 ldr $h7q, [$current_tag, #176] @ load h7l | h7h 4224 ext $h7.16b, $h7.16b, $h7.16b, #8 4225 ldr $h8q, [$current_tag, #208] @ load h8l | h8h 4226 ext $h8.16b, $h8.16b, $h8.16b, #8 4227 rev64 $res0b, $res0b @ GHASH block 8k 4228 ext $acc_lb, $acc_lb, $acc_lb, #8 @ PRE 0 4229 4230 rev64 $res3b, $res3b @ GHASH block 8k+3 4231 rev32 $ctr6.16b, $rtmp_ctr.16b @ CTR block 8k+14 4232 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+14 4233 4234 eor $res0b, $res0b, $acc_lb @ PRE 1 4235 rev64 $res2b, $res2b @ GHASH block 8k+2 4236 rev64 $res1b, $res1b @ GHASH block 8k+1 4237 4238 ldr $h5q, [$current_tag, #128] @ load h5l | h5h 4239 ext $h5.16b, $h5.16b, $h5.16b, #8 4240 ldr $h6q, [$current_tag, #160] @ load h6l | h6h 4241 ext $h6.16b, $h6.16b, $h6.16b, #8 4242 rev32 $ctr7.16b, $rtmp_ctr.16b @ CTR block 8k+15 4243 4244 aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 0 4245 aese $ctr6b, $rk0 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 0 4246 aese $ctr5b, $rk0 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 0 4247 4248 aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 0 4249 aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 0 4250 pmull2 $t0.1q, $res1.2d, $h7.2d @ GHASH block 8k+1 - high 4251 4252 aese $ctr4b, $rk0 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 0 4253 pmull2 $acc_h.1q, $res0.2d, $h8.2d @ GHASH block 8k - high 4254 aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 0 4255 4256 aese $ctr6b, $rk1 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 1 4257 aese $ctr7b, $rk0 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 0 4258 ldp $rk2q, $rk3q, [$cc, #32] @ load rk2, rk3 4259 4260 aese $ctr4b, $rk1 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 1 4261 pmull2 $t1.1q, $res2.2d, $h6.2d @ GHASH block 8k+2 - high 4262 pmull $h6.1q, $res2.1d, $h6.1d @ GHASH block 8k+2 - low 4263 4264 pmull $h7.1q, $res1.1d, $h7.1d @ GHASH block 8k+1 - low 4265 eor $acc_hb, $acc_hb, $t0.16b @ GHASH block 8k+1 - high 4266 aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 1 4267 4268 pmull $acc_l.1q, $res0.1d, $h8.1d @ GHASH block 8k - low 4269 aese $ctr7b, $rk1 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 1 4270 aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 1 4271 4272 trn1 $acc_m.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid 4273 trn2 $res0.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid 4274 pmull2 $t2.1q, $res3.2d, $h5.2d @ GHASH block 8k+3 - high 4275 4276 aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 1 4277 aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 1 4278 aese $ctr5b, $rk1 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 1 4279 4280 ldr $h56kq, [$current_tag, #144] @ load h6k | h5k 4281 ldr $h78kq, [$current_tag, #192] @ load h8k | h7k 4282 aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 2 4283 eor $res0.16b, $res0.16b, $acc_m.16b @ GHASH block 8k, 8k+1 - mid 4284 4285 aese $ctr6b, $rk2 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 2 4286 rev64 $res5b, $res5b @ GHASH block 8k+5 4287 pmull $h5.1q, $res3.1d, $h5.1d @ GHASH block 8k+3 - low 4288 4289 eor3 $acc_hb, $acc_hb, $t1.16b, $t2.16b @ GHASH block 8k+2, 8k+3 - high 4290 aese $ctr4b, $rk2 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 2 4291 aese $ctr5b, $rk2 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 2 4292 4293 trn1 $t3.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid 4294 aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 3 4295 aese $ctr7b, $rk2 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 2 4296 4297 aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 2 4298 aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 2 4299 trn2 $res2.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid 4300 4301 pmull2 $acc_m.1q, $res0.2d, $h78k.2d @ GHASH block 8k - mid 4302 aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 2 4303 pmull $h78k.1q, $res0.1d, $h78k.1d @ GHASH block 8k+1 - mid 4304 4305 aese $ctr5b, $rk3 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 3 4306 eor $res2.16b, $res2.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid 4307 eor $acc_lb, $acc_lb, $h7.16b @ GHASH block 8k+1 - low 4308 4309 aese $ctr7b, $rk3 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 3 4310 aese $ctr6b, $rk3 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 3 4311 aese $ctr4b, $rk3 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 3 4312 4313 eor3 $acc_lb, $acc_lb, $h6.16b, $h5.16b @ GHASH block 8k+2, 8k+3 - low 4314 ldp $rk4q, $rk5q, [$cc, #64] @ load rk4, rk5 4315 aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 3 4316 4317 ldr $h3q, [$current_tag, #80] @ load h3l | h3h 4318 ext $h3.16b, $h3.16b, $h3.16b, #8 4319 ldr $h4q, [$current_tag, #112] @ load h4l | h4h 4320 ext $h4.16b, $h4.16b, $h4.16b, #8 4321 pmull2 $t3.1q, $res2.2d, $h56k.2d @ GHASH block 8k+2 - mid 4322 pmull $h56k.1q, $res2.1d, $h56k.1d @ GHASH block 8k+3 - mid 4323 4324 ldr $h1q, [$current_tag, #32] @ load h1l | h1h 4325 ext $h1.16b, $h1.16b, $h1.16b, #8 4326 ldr $h2q, [$current_tag, #64] @ load h2l | h2h 4327 ext $h2.16b, $h2.16b, $h2.16b, #8 4328 eor $acc_mb, $acc_mb, $h78k.16b @ GHASH block 8k+1 - mid 4329 aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 3 4330 4331 rev64 $res7b, $res7b @ GHASH block 8k+7 4332 4333 eor3 $acc_mb, $acc_mb, $h56k.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid 4334 rev64 $res4b, $res4b @ GHASH block 8k+4 4335 4336 aese $ctr5b, $rk4 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 4 4337 aese $ctr4b, $rk4 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 4 4338 aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 3 4339 4340 aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 4 4341 aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 4 4342 aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 4 4343 4344 aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 4 4345 aese $ctr6b, $rk4 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 4 4346 aese $ctr7b, $rk4 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 4 4347 4348 rev64 $res6b, $res6b @ GHASH block 8k+6 4349 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k 4350 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k 4351 trn1 $t6.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid 4352 4353 aese $ctr7b, $rk5 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 5 4354 aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 5 4355 aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 5 4356 4357 ldp $rk6q, $rk7q, [$cc, #96] @ load rk6, rk7 4358 aese $ctr6b, $rk5 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 5 4359 aese $ctr5b, $rk5 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 5 4360 4361 pmull2 $t7.1q, $res6.2d, $h2.2d @ GHASH block 8k+6 - high 4362 pmull2 $t4.1q, $res4.2d, $h4.2d @ GHASH block 8k+4 - high 4363 pmull $h2.1q, $res6.1d, $h2.1d @ GHASH block 8k+6 - low 4364 4365 aese $ctr4b, $rk5 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 5 4366 4367 pmull $h4.1q, $res4.1d, $h4.1d @ GHASH block 8k+4 - low 4368 trn2 $res4.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid 4369 pmull2 $t5.1q, $res5.2d, $h3.2d @ GHASH block 8k+5 - high 4370 4371 pmull $h3.1q, $res5.1d, $h3.1d @ GHASH block 8k+5 - low 4372 trn1 $t9.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid 4373 aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 5 4374 4375 trn2 $res6.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid 4376 aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 5 4377 eor $res4.16b, $res4.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid 4378 4379 aese $ctr4b, $rk6 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 6 4380 aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 6 4381 4382 eor $res6.16b, $res6.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid 4383 aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 6 4384 aese $ctr7b, $rk6 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 6 4385 4386 pmull2 $t6.1q, $res4.2d, $h34k.2d @ GHASH block 8k+4 - mid 4387 pmull $h34k.1q, $res4.1d, $h34k.1d @ GHASH block 8k+5 - mid 4388 aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 6 4389 4390 pmull2 $t9.1q, $res6.2d, $h12k.2d @ GHASH block 8k+6 - mid 4391 aese $ctr5b, $rk6 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 6 4392 pmull2 $t8.1q, $res7.2d, $h1.2d @ GHASH block 8k+7 - high 4393 4394 eor3 $acc_mb, $acc_mb, $h34k.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid 4395 aese $ctr4b, $rk7 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 7 4396 eor3 $acc_lb, $acc_lb, $h4.16b, $h3.16b @ GHASH block 8k+4, 8k+5 - low 4397 4398 aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 6 4399 aese $ctr6b, $rk6 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 6 4400 aese $ctr5b, $rk7 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 7 4401 4402 ldp $rk8q, $rk9q, [$cc, #128] @ load rk8, rk9 4403 pmull $h12k.1q, $res6.1d, $h12k.1d @ GHASH block 8k+7 - mid 4404 aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 7 4405 4406 ldr $mod_constantd, [$modulo_constant] @ MODULO - load modulo constant 4407 eor3 $acc_hb, $acc_hb, $t4.16b, $t5.16b @ GHASH block 8k+4, 8k+5 - high 4408 pmull $h1.1q, $res7.1d, $h1.1d @ GHASH block 8k+7 - low 4409 4410 aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 7 4411 aese $ctr7b, $rk7 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 7 4412 aese $ctr6b, $rk7 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 7 4413 4414 eor3 $acc_hb, $acc_hb, $t7.16b, $t8.16b @ GHASH block 8k+6, 8k+7 - high 4415 eor3 $acc_lb, $acc_lb, $h2.16b, $h1.16b @ GHASH block 8k+6, 8k+7 - low 4416 eor3 $acc_mb, $acc_mb, $h12k.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid 4417 4418 aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 7 4419 aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 7 4420 4421 eor3 $acc_mb, $acc_mb, $acc_hb, $acc_lb @ MODULO - karatsuba tidy up 4422 ext $t11.16b, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment 4423 aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 8 4424 4425 aese $ctr6b, $rk8 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 8 4426 aese $ctr7b, $rk8 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 8 4427 aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 8 4428 4429 aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 8 4430 pmull $t12.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid 4431 aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 8 4432 4433 aese $ctr5b, $rk8 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 8 4434 aese $ctr4b, $rk8 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 8 4435 ldp $rk10q, $rk11q, [$cc, #160] @ load rk10, rk11 4436 4437 eor3 $acc_mb, $acc_mb, $t12.16b, $t11.16b @ MODULO - fold into mid 4438 aese $ctr7b, $rk9 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 9 4439 aese $ctr6b, $rk9 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 9 4440 4441 aese $ctr5b, $rk9 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 9 4442 aese $ctr2b, $rk9 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 9 4443 aese $ctr3b, $rk9 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 9 4444 4445 aese $ctr0b, $rk9 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 9 4446 aese $ctr1b, $rk9 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 9 4447 aese $ctr4b, $rk9 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 9 4448 4449 pmull $acc_h.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low 4450 ldr $rk12q, [$cc, #192] @ load rk12 4451 ext $t11.16b, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment 4452 4453 aese $ctr2b, $rk10 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 10 4454 aese $ctr5b, $rk10 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 10 4455 aese $ctr0b, $rk10 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 10 4456 4457 aese $ctr4b, $rk10 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 10 4458 aese $ctr6b, $rk10 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 10 4459 aese $ctr7b, $rk10 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 10 4460 4461 aese $ctr0b, $rk11 @ AES block 8k+8 - round 11 4462 eor3 $acc_lb, $acc_lb, $acc_hb, $t11.16b @ MODULO - fold into low 4463 aese $ctr5b, $rk11 @ AES block 8k+13 - round 11 4464 4465 aese $ctr2b, $rk11 @ AES block 8k+10 - round 11 4466 aese $ctr3b, $rk10 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 10 4467 aese $ctr1b, $rk10 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 10 4468 4469 aese $ctr6b, $rk11 @ AES block 8k+14 - round 11 4470 aese $ctr4b, $rk11 @ AES block 8k+12 - round 11 4471 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+15 4472 4473 aese $ctr3b, $rk11 @ AES block 8k+11 - round 11 4474 aese $ctr1b, $rk11 @ AES block 8k+9 - round 11 4475 aese $ctr7b, $rk11 @ AES block 8k+15 - round 11 4476 4477.L192_dec_tail: @ TAIL 4478 4479 sub $main_end_input_ptr, $end_input_ptr, $input_ptr @ main_end_input_ptr is number of bytes left to process 4480 4481 ldp $h5q, $h56kq, [$current_tag, #128] @ load h5l | h5h 4482 ext $h5.16b, $h5.16b, $h5.16b, #8 4483 ldr $res1q, [$input_ptr], #16 @ AES block 8k+8 - load ciphertext 4484 4485 ldp $h78kq, $h8q, [$current_tag, #192] @ load h8k | h7k 4486 ext $h8.16b, $h8.16b, $h8.16b, #8 4487 4488 mov $t1.16b, $rk12 4489 4490 ldp $h6q, $h7q, [$current_tag, #160] @ load h6l | h6h 4491 ext $h6.16b, $h6.16b, $h6.16b, #8 4492 ext $h7.16b, $h7.16b, $h7.16b, #8 4493 ext $t0.16b, $acc_lb, $acc_lb, #8 @ prepare final partial tag 4494 4495 eor3 $res4b, $res1b, $ctr0b, $t1.16b @ AES block 8k+8 - result 4496 cmp $main_end_input_ptr, #112 4497 b.gt .L192_dec_blocks_more_than_7 4498 4499 mov $ctr7b, $ctr6b 4500 movi $acc_h.8b, #0 4501 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 4502 4503 mov $ctr6b, $ctr5b 4504 mov $ctr5b, $ctr4b 4505 mov $ctr4b, $ctr3b 4506 4507 cmp $main_end_input_ptr, #96 4508 movi $acc_l.8b, #0 4509 mov $ctr3b, $ctr2b 4510 4511 mov $ctr2b, $ctr1b 4512 movi $acc_m.8b, #0 4513 b.gt .L192_dec_blocks_more_than_6 4514 4515 mov $ctr7b, $ctr6b 4516 mov $ctr6b, $ctr5b 4517 mov $ctr5b, $ctr4b 4518 4519 mov $ctr4b, $ctr3b 4520 mov $ctr3b, $ctr1b 4521 4522 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 4523 cmp $main_end_input_ptr, #80 4524 b.gt .L192_dec_blocks_more_than_5 4525 4526 mov $ctr7b, $ctr6b 4527 mov $ctr6b, $ctr5b 4528 4529 mov $ctr5b, $ctr4b 4530 mov $ctr4b, $ctr1b 4531 cmp $main_end_input_ptr, #64 4532 4533 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 4534 b.gt .L192_dec_blocks_more_than_4 4535 4536 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 4537 mov $ctr7b, $ctr6b 4538 mov $ctr6b, $ctr5b 4539 4540 mov $ctr5b, $ctr1b 4541 cmp $main_end_input_ptr, #48 4542 b.gt .L192_dec_blocks_more_than_3 4543 4544 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 4545 mov $ctr7b, $ctr6b 4546 cmp $main_end_input_ptr, #32 4547 4548 mov $ctr6b, $ctr1b 4549 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k 4550 b.gt .L192_dec_blocks_more_than_2 4551 4552 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 4553 4554 mov $ctr7b, $ctr1b 4555 cmp $main_end_input_ptr, #16 4556 b.gt .L192_dec_blocks_more_than_1 4557 4558 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 4559 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k 4560 b .L192_dec_blocks_less_than_1 4561.L192_dec_blocks_more_than_7: @ blocks left > 7 4562 rev64 $res0b, $res1b @ GHASH final-7 block 4563 4564 ins $acc_m.d[0], $h78k.d[1] @ GHASH final-7 block - mid 4565 eor $res0b, $res0b, $t0.16b @ feed in partial tag 4566 4567 pmull2 $acc_h.1q, $res0.2d, $h8.2d @ GHASH final-7 block - high 4568 ins $rk4v.d[0], $res0.d[1] @ GHASH final-7 block - mid 4569 ldr $res1q, [$input_ptr], #16 @ AES final-6 block - load ciphertext 4570 4571 pmull $acc_l.1q, $res0.1d, $h8.1d @ GHASH final-7 block - low 4572 4573 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-7 block - mid 4574 st1 { $res4b}, [$output_ptr], #16 @ AES final-7 block - store result 4575 4576 eor3 $res4b, $res1b, $ctr1b, $t1.16b @ AES final-6 block - result 4577 4578 pmull $acc_m.1q, $rk4v.1d, $acc_m.1d @ GHASH final-7 block - mid 4579 movi $t0.8b, #0 @ supress further partial tag feed in 4580.L192_dec_blocks_more_than_6: @ blocks left > 6 4581 4582 rev64 $res0b, $res1b @ GHASH final-6 block 4583 4584 eor $res0b, $res0b, $t0.16b @ feed in partial tag 4585 4586 ldr $res1q, [$input_ptr], #16 @ AES final-5 block - load ciphertext 4587 ins $rk4v.d[0], $res0.d[1] @ GHASH final-6 block - mid 4588 4589 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-6 block - mid 4590 movi $t0.8b, #0 @ supress further partial tag feed in 4591 pmull2 $rk2q1, $res0.2d, $h7.2d @ GHASH final-6 block - high 4592 4593 st1 { $res4b}, [$output_ptr], #16 @ AES final-6 block - store result 4594 eor3 $res4b, $res1b, $ctr2b, $t1.16b @ AES final-5 block - result 4595 4596 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-6 block - high 4597 pmull $rk4v.1q, $rk4v.1d, $h78k.1d @ GHASH final-6 block - mid 4598 pmull $rk3q1, $res0.1d, $h7.1d @ GHASH final-6 block - low 4599 4600 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-6 block - mid 4601 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-6 block - low 4602.L192_dec_blocks_more_than_5: @ blocks left > 5 4603 4604 rev64 $res0b, $res1b @ GHASH final-5 block 4605 4606 eor $res0b, $res0b, $t0.16b @ feed in partial tag 4607 4608 ins $rk4v.d[0], $res0.d[1] @ GHASH final-5 block - mid 4609 4610 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-5 block - mid 4611 4612 ins $rk4v.d[1], $rk4v.d[0] @ GHASH final-5 block - mid 4613 pmull2 $rk2q1, $res0.2d, $h6.2d @ GHASH final-5 block - high 4614 4615 ldr $res1q, [$input_ptr], #16 @ AES final-4 block - load ciphertext 4616 4617 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-5 block - high 4618 pmull $rk3q1, $res0.1d, $h6.1d @ GHASH final-5 block - low 4619 4620 pmull2 $rk4v.1q, $rk4v.2d, $h56k.2d @ GHASH final-5 block - mid 4621 4622 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-5 block - low 4623 movi $t0.8b, #0 @ supress further partial tag feed in 4624 st1 { $res4b}, [$output_ptr], #16 @ AES final-5 block - store result 4625 4626 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-5 block - mid 4627 eor3 $res4b, $res1b, $ctr3b, $t1.16b @ AES final-4 block - result 4628.L192_dec_blocks_more_than_4: @ blocks left > 4 4629 4630 rev64 $res0b, $res1b @ GHASH final-4 block 4631 4632 eor $res0b, $res0b, $t0.16b @ feed in partial tag 4633 movi $t0.8b, #0 @ supress further partial tag feed in 4634 4635 ldr $res1q, [$input_ptr], #16 @ AES final-3 block - load ciphertext 4636 ins $rk4v.d[0], $res0.d[1] @ GHASH final-4 block - mid 4637 pmull $rk3q1, $res0.1d, $h5.1d @ GHASH final-4 block - low 4638 4639 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-4 block - mid 4640 4641 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-4 block - low 4642 4643 pmull $rk4v.1q, $rk4v.1d, $h56k.1d @ GHASH final-4 block - mid 4644 st1 { $res4b}, [$output_ptr], #16 @ AES final-4 block - store result 4645 pmull2 $rk2q1, $res0.2d, $h5.2d @ GHASH final-4 block - high 4646 4647 eor3 $res4b, $res1b, $ctr4b, $t1.16b @ AES final-3 block - result 4648 4649 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-4 block - mid 4650 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-4 block - high 4651.L192_dec_blocks_more_than_3: @ blocks left > 3 4652 4653 ldr $h4q, [$current_tag, #112] @ load h4l | h4h 4654 ext $h4.16b, $h4.16b, $h4.16b, #8 4655 rev64 $res0b, $res1b @ GHASH final-3 block 4656 ldr $res1q, [$input_ptr], #16 @ AES final-2 block - load ciphertext 4657 4658 eor $res0b, $res0b, $t0.16b @ feed in partial tag 4659 4660 ins $rk4v.d[0], $res0.d[1] @ GHASH final-3 block - mid 4661 pmull2 $rk2q1, $res0.2d, $h4.2d @ GHASH final-3 block - high 4662 4663 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-3 block - high 4664 movi $t0.8b, #0 @ supress further partial tag feed in 4665 pmull $rk3q1, $res0.1d, $h4.1d @ GHASH final-3 block - low 4666 4667 st1 { $res4b}, [$output_ptr], #16 @ AES final-3 block - store result 4668 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-3 block - mid 4669 eor3 $res4b, $res1b, $ctr5b, $t1.16b @ AES final-2 block - result 4670 4671 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-3 block - low 4672 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k 4673 4674 ins $rk4v.d[1], $rk4v.d[0] @ GHASH final-3 block - mid 4675 4676 pmull2 $rk4v.1q, $rk4v.2d, $h34k.2d @ GHASH final-3 block - mid 4677 4678 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-3 block - mid 4679.L192_dec_blocks_more_than_2: @ blocks left > 2 4680 4681 rev64 $res0b, $res1b @ GHASH final-2 block 4682 ldr $h3q, [$current_tag, #80] @ load h3l | h3h 4683 ext $h3.16b, $h3.16b, $h3.16b, #8 4684 4685 eor $res0b, $res0b, $t0.16b @ feed in partial tag 4686 4687 ins $rk4v.d[0], $res0.d[1] @ GHASH final-2 block - mid 4688 ldr $res1q, [$input_ptr], #16 @ AES final-1 block - load ciphertext 4689 4690 pmull2 $rk2q1, $res0.2d, $h3.2d @ GHASH final-2 block - high 4691 4692 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-2 block - mid 4693 4694 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-2 block - high 4695 pmull $rk3q1, $res0.1d, $h3.1d @ GHASH final-2 block - low 4696 4697 pmull $rk4v.1q, $rk4v.1d, $h34k.1d @ GHASH final-2 block - mid 4698 movi $t0.8b, #0 @ supress further partial tag feed in 4699 4700 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-2 block - low 4701 st1 { $res4b}, [$output_ptr], #16 @ AES final-2 block - store result 4702 4703 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-2 block - mid 4704 eor3 $res4b, $res1b, $ctr6b, $t1.16b @ AES final-1 block - result 4705.L192_dec_blocks_more_than_1: @ blocks left > 1 4706 4707 rev64 $res0b, $res1b @ GHASH final-1 block 4708 ldr $res1q, [$input_ptr], #16 @ AES final block - load ciphertext 4709 ldr $h2q, [$current_tag, #64] @ load h1l | h1h 4710 ext $h2.16b, $h2.16b, $h2.16b, #8 4711 4712 eor $res0b, $res0b, $t0.16b @ feed in partial tag 4713 movi $t0.8b, #0 @ supress further partial tag feed in 4714 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k 4715 4716 pmull $rk3q1, $res0.1d, $h2.1d @ GHASH final-1 block - low 4717 ins $rk4v.d[0], $res0.d[1] @ GHASH final-1 block - mid 4718 st1 { $res4b}, [$output_ptr], #16 @ AES final-1 block - store result 4719 4720 pmull2 $rk2q1, $res0.2d, $h2.2d @ GHASH final-1 block - high 4721 4722 eor3 $res4b, $res1b, $ctr7b, $t1.16b @ AES final block - result 4723 4724 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-1 block - mid 4725 4726 ins $rk4v.d[1], $rk4v.d[0] @ GHASH final-1 block - mid 4727 4728 pmull2 $rk4v.1q, $rk4v.2d, $h12k.2d @ GHASH final-1 block - mid 4729 4730 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-1 block - low 4731 4732 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-1 block - mid 4733 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-1 block - high 4734.L192_dec_blocks_less_than_1: @ blocks left <= 1 4735 4736 rev32 $rtmp_ctr.16b, $rtmp_ctr.16b 4737 and $bit_length, $bit_length, #127 @ bit_length %= 128 4738 4739 sub $bit_length, $bit_length, #128 @ bit_length -= 128 4740 str $rtmp_ctrq, [$counter] @ store the updated counter 4741 4742 neg $bit_length, $bit_length @ bit_length = 128 - #bits in input (in range [1,128]) 4743 mvn $temp0_x, xzr @ temp0_x = 0xffffffffffffffff 4744 4745 and $bit_length, $bit_length, #127 @ bit_length %= 128 4746 4747 mvn $temp1_x, xzr @ temp1_x = 0xffffffffffffffff 4748 lsr $temp0_x, $temp0_x, $bit_length @ temp0_x is mask for top 64b of last block 4749 cmp $bit_length, #64 4750 4751 csel $temp2_x, $temp1_x, $temp0_x, lt 4752 csel $temp3_x, $temp0_x, xzr, lt 4753 ldr $h1q, [$current_tag, #32] @ load h1l | h1h 4754 ext $h1.16b, $h1.16b, $h1.16b, #8 4755 4756 mov $ctr0.d[1], $temp3_x 4757 ld1 { $rk0}, [$output_ptr] @ load existing bytes where the possibly partial last block is to be stored 4758 4759 mov $ctr0.d[0], $temp2_x @ ctr0b is mask for last block 4760 4761 and $res1b, $res1b, $ctr0b @ possibly partial last block has zeroes in highest bits 4762 bif $res4b, $rk0, $ctr0b @ insert existing bytes in top end of result before storing 4763 4764 rev64 $res0b, $res1b @ GHASH final block 4765 4766 st1 { $res4b}, [$output_ptr] @ store all 16B 4767 4768 eor $res0b, $res0b, $t0.16b @ feed in partial tag 4769 4770 ins $t0.d[0], $res0.d[1] @ GHASH final block - mid 4771 pmull $rk3q1, $res0.1d, $h1.1d @ GHASH final block - low 4772 4773 eor $t0.8b, $t0.8b, $res0.8b @ GHASH final block - mid 4774 pmull2 $rk2q1, $res0.2d, $h1.2d @ GHASH final block - high 4775 eor $acc_lb, $acc_lb, $rk3 @ GHASH final block - low 4776 4777 pmull $t0.1q, $t0.1d, $h12k.1d @ GHASH final block - mid 4778 eor $acc_hb, $acc_hb, $rk2 @ GHASH final block - high 4779 4780 eor $t10.16b, $acc_hb, $acc_lb @ MODULO - karatsuba tidy up 4781 eor $acc_mb, $acc_mb, $t0.16b @ GHASH final block - mid 4782 ldr $mod_constantd, [$modulo_constant] @ MODULO - load modulo constant 4783 4784 pmull $t11.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid 4785 ext $acc_hb, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment 4786 4787 eor $acc_mb, $acc_mb, $t10.16b @ MODULO - karatsuba tidy up 4788 4789 eor3 $acc_mb, $acc_mb, $acc_hb, $t11.16b @ MODULO - fold into mid 4790 4791 pmull $acc_h.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low 4792 ext $acc_mb, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment 4793 4794 eor3 $acc_lb, $acc_lb, $acc_mb, $acc_hb @ MODULO - fold into low 4795 ext $acc_lb, $acc_lb, $acc_lb, #8 4796 rev64 $acc_lb, $acc_lb 4797 st1 { $acc_l.16b }, [$current_tag] 4798 4799 ldp d10, d11, [sp, #16] 4800 ldp d12, d13, [sp, #32] 4801 ldp d14, d15, [sp, #48] 4802 ldp d8, d9, [sp], #80 4803 ret 4804 4805.L192_dec_ret: 4806 mov w0, #0x0 4807 ret 4808.size unroll8_eor3_aes_gcm_dec_192_kernel,.-unroll8_eor3_aes_gcm_dec_192_kernel 4809___ 4810} 4811 4812{ 4813 4814my ($end_input_ptr,$main_end_input_ptr,$temp0_x,$temp1_x)=map("x$_",(4..7)); 4815my ($temp2_x,$temp3_x)=map("x$_",(13..14)); 4816my ($ctr0b,$ctr1b,$ctr2b,$ctr3b,$ctr4b,$ctr5b,$ctr6b,$ctr7b,$res0b,$res1b,$res2b,$res3b,$res4b,$res5b,$res6b,$res7b)=map("v$_.16b",(0..15)); 4817my ($ctr0,$ctr1,$ctr2,$ctr3,$ctr4,$ctr5,$ctr6,$ctr7,$res0,$res1,$res2,$res3,$res4,$res5,$res6,$res7)=map("v$_",(0..15)); 4818my ($ctr0d,$ctr1d,$ctr2d,$ctr3d,$ctr4d,$ctr5d,$ctr6d,$ctr7d)=map("d$_",(0..7)); 4819my ($ctr0q,$ctr1q,$ctr2q,$ctr3q,$ctr4q,$ctr5q,$ctr6q,$ctr7q)=map("q$_",(0..7)); 4820my ($res0q,$res1q,$res2q,$res3q,$res4q,$res5q,$res6q,$res7q)=map("q$_",(8..15)); 4821 4822my ($ctr_t0,$ctr_t1,$ctr_t2,$ctr_t3,$ctr_t4,$ctr_t5,$ctr_t6,$ctr_t7)=map("v$_",(8..15)); 4823my ($ctr_t0b,$ctr_t1b,$ctr_t2b,$ctr_t3b,$ctr_t4b,$ctr_t5b,$ctr_t6b,$ctr_t7b)=map("v$_.16b",(8..15)); 4824my ($ctr_t0q,$ctr_t1q,$ctr_t2q,$ctr_t3q,$ctr_t4q,$ctr_t5q,$ctr_t6q,$ctr_t7q)=map("q$_",(8..15)); 4825 4826my ($acc_hb,$acc_mb,$acc_lb)=map("v$_.16b",(17..19)); 4827my ($acc_h,$acc_m,$acc_l)=map("v$_",(17..19)); 4828 4829my ($h1,$h12k,$h2,$h3,$h34k,$h4)=map("v$_",(20..25)); 4830my ($h5,$h56k,$h6,$h7,$h78k,$h8)=map("v$_",(20..25)); 4831my ($h1q,$h12kq,$h2q,$h3q,$h34kq,$h4q)=map("q$_",(20..25)); 4832my ($h5q,$h56kq,$h6q,$h7q,$h78kq,$h8q)=map("q$_",(20..25)); 4833 4834my $t0="v16"; 4835my $t0d="d16"; 4836 4837my $t1="v29"; 4838my $t2=$res1; 4839my $t3=$t1; 4840 4841my $t4=$res0; 4842my $t5=$res2; 4843my $t6=$t0; 4844 4845my $t7=$res3; 4846my $t8=$res4; 4847my $t9=$res5; 4848 4849my $t10=$res6; 4850my $t11="v21"; 4851my $t12=$t1; 4852 4853my $rtmp_ctr="v30"; 4854my $rtmp_ctrq="q30"; 4855my $rctr_inc="v31"; 4856my $rctr_incd="d31"; 4857 4858my $mod_constantd=$t0d; 4859my $mod_constant=$t0; 4860 4861my ($rk0,$rk1,$rk2)=map("v$_.16b",(26..28)); 4862my ($rk3,$rk4,$rk5)=map("v$_.16b",(26..28)); 4863my ($rk6,$rk7,$rk8)=map("v$_.16b",(26..28)); 4864my ($rk9,$rk10,$rk11)=map("v$_.16b",(26..28)); 4865my ($rk12,$rk13,$rk14)=map("v$_.16b",(26..28)); 4866my ($rk0q,$rk1q,$rk2q)=map("q$_",(26..28)); 4867my ($rk3q,$rk4q,$rk5q)=map("q$_",(26..28)); 4868my ($rk6q,$rk7q,$rk8q)=map("q$_",(26..28)); 4869my ($rk9q,$rk10q,$rk11q)=map("q$_",(26..28)); 4870my ($rk12q,$rk13q,$rk14q)=map("q$_",(26..28)); 4871my $rk2q1="v28.1q"; 4872my $rk3q1="v26.1q"; 4873my $rk4v="v27"; 4874######################################################################################### 4875# size_t unroll8_eor3_aes_gcm_enc_256_kernel(const unsigned char *in, 4876# size_t len, 4877# unsigned char *out, 4878# const void *key, 4879# unsigned char ivec[16], 4880# u64 *Xi); 4881# 4882$code.=<<___; 4883.global unroll8_eor3_aes_gcm_enc_256_kernel 4884.type unroll8_eor3_aes_gcm_enc_256_kernel,%function 4885.align 4 4886unroll8_eor3_aes_gcm_enc_256_kernel: 4887 AARCH64_VALID_CALL_TARGET 4888 cbz x1, .L256_enc_ret 4889 stp d8, d9, [sp, #-80]! 4890 mov $counter, x4 4891 mov $cc, x5 4892 stp d10, d11, [sp, #16] 4893 stp d12, d13, [sp, #32] 4894 stp d14, d15, [sp, #48] 4895 mov x5, #0xc200000000000000 4896 stp x5, xzr, [sp, #64] 4897 add $modulo_constant, sp, #64 4898 4899 ld1 { $ctr0b}, [$counter] @ CTR block 0 4900 4901 lsr $main_end_input_ptr, $bit_length, #3 @ byte_len 4902 4903 mov $constant_temp, #0x100000000 @ set up counter increment 4904 movi $rctr_inc.16b, #0x0 4905 mov $rctr_inc.d[1], $constant_temp 4906 sub $main_end_input_ptr, $main_end_input_ptr, #1 @ byte_len - 1 4907 4908 and $main_end_input_ptr, $main_end_input_ptr, #0xffffffffffffff80 @ number of bytes to be processed in main loop (at least 1 byte must be handled by tail) 4909 4910 add $main_end_input_ptr, $main_end_input_ptr, $input_ptr 4911 4912 rev32 $rtmp_ctr.16b, $ctr0.16b @ set up reversed counter 4913 4914 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 0 4915 4916 rev32 $ctr1.16b, $rtmp_ctr.16b @ CTR block 1 4917 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 1 4918 4919 rev32 $ctr2.16b, $rtmp_ctr.16b @ CTR block 2 4920 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 2 4921 4922 rev32 $ctr3.16b, $rtmp_ctr.16b @ CTR block 3 4923 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 3 4924 4925 rev32 $ctr4.16b, $rtmp_ctr.16b @ CTR block 4 4926 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 4 4927 4928 rev32 $ctr5.16b, $rtmp_ctr.16b @ CTR block 5 4929 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 5 4930 ldp $rk0q, $rk1q, [$cc, #0] @ load rk0, rk1 4931 4932 rev32 $ctr6.16b, $rtmp_ctr.16b @ CTR block 6 4933 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 6 4934 4935 rev32 $ctr7.16b, $rtmp_ctr.16b @ CTR block 7 4936 4937 aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 0 4938 aese $ctr4b, $rk0 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 0 4939 aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 0 4940 4941 aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 0 4942 aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 0 4943 aese $ctr6b, $rk0 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 0 4944 4945 aese $ctr5b, $rk0 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 0 4946 aese $ctr7b, $rk0 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 0 4947 ldp $rk2q, $rk3q, [$cc, #32] @ load rk2, rk3 4948 4949 aese $ctr4b, $rk1 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 1 4950 aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 1 4951 aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 1 4952 4953 aese $ctr6b, $rk1 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 1 4954 aese $ctr5b, $rk1 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 1 4955 4956 aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 1 4957 4958 aese $ctr7b, $rk1 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 1 4959 4960 aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 2 4961 aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 2 4962 aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 1 4963 4964 aese $ctr7b, $rk2 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 2 4965 aese $ctr6b, $rk2 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 2 4966 aese $ctr5b, $rk2 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 2 4967 4968 aese $ctr4b, $rk2 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 2 4969 aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 2 4970 aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 2 4971 4972 aese $ctr5b, $rk3 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 3 4973 aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 3 4974 ldp $rk4q, $rk5q, [$cc, #64] @ load rk4, rk5 4975 4976 aese $ctr4b, $rk3 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 3 4977 4978 aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 3 4979 aese $ctr6b, $rk3 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 3 4980 aese $ctr7b, $rk3 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 3 4981 4982 aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 3 4983 aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 3 4984 4985 aese $ctr4b, $rk4 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 4 4986 aese $ctr6b, $rk4 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 4 4987 aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 4 4988 4989 aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 4 4990 aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 4 4991 4992 aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 4 4993 aese $ctr7b, $rk4 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 4 4994 aese $ctr5b, $rk4 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 4 4995 4996 aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 5 4997 aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 5 4998 ldp $rk6q, $rk7q, [$cc, #96] @ load rk6, rk7 4999 5000 aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 5 5001 aese $ctr4b, $rk5 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 5 5002 aese $ctr5b, $rk5 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 5 5003 5004 aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 5 5005 aese $ctr6b, $rk5 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 5 5006 aese $ctr7b, $rk5 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 5 5007 5008 aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 6 5009 aese $ctr5b, $rk6 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 6 5010 aese $ctr4b, $rk6 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 6 5011 5012 aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 6 5013 aese $ctr6b, $rk6 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 6 5014 aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 6 5015 5016 aese $ctr7b, $rk6 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 6 5017 aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 6 5018 ldp $rk8q, $rk9q, [$cc, #128] @ load rk8, rk9 5019 5020 aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 7 5021 aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 7 5022 5023 aese $ctr7b, $rk7 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 7 5024 aese $ctr6b, $rk7 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 7 5025 aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 7 5026 5027 aese $ctr5b, $rk7 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 7 5028 aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 7 5029 5030 aese $ctr4b, $rk7 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 7 5031 5032 aese $ctr6b, $rk8 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 8 5033 aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 8 5034 5035 aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 8 5036 aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 8 5037 aese $ctr7b, $rk8 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 8 5038 5039 aese $ctr5b, $rk8 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 8 5040 aese $ctr4b, $rk8 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 8 5041 aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 8 5042 5043 ld1 { $acc_lb}, [$current_tag] 5044 ext $acc_lb, $acc_lb, $acc_lb, #8 5045 rev64 $acc_lb, $acc_lb 5046 ldp $rk10q, $rk11q, [$cc, #160] @ load rk10, rk11 5047 5048 aese $ctr6b, $rk9 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 9 5049 aese $ctr7b, $rk9 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 9 5050 aese $ctr3b, $rk9 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 9 5051 5052 aese $ctr4b, $rk9 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 9 5053 aese $ctr5b, $rk9 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 9 5054 aese $ctr2b, $rk9 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 9 5055 5056 aese $ctr1b, $rk9 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 9 5057 5058 aese $ctr7b, $rk10 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 10 5059 aese $ctr4b, $rk10 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 10 5060 aese $ctr0b, $rk9 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 9 5061 5062 aese $ctr1b, $rk10 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 10 5063 aese $ctr5b, $rk10 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 10 5064 aese $ctr3b, $rk10 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 10 5065 5066 aese $ctr2b, $rk10 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 10 5067 aese $ctr0b, $rk10 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 10 5068 aese $ctr6b, $rk10 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 10 5069 5070 aese $ctr4b, $rk11 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 11 5071 ldp $rk12q, $rk13q, [$cc, #192] @ load rk12, rk13 5072 aese $ctr5b, $rk11 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 11 5073 5074 aese $ctr2b, $rk11 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 11 5075 aese $ctr6b, $rk11 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 11 5076 aese $ctr1b, $rk11 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 11 5077 5078 aese $ctr0b, $rk11 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 11 5079 aese $ctr3b, $rk11 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 11 5080 aese $ctr7b, $rk11 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 11 5081 5082 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 7 5083 ldr $rk14q, [$cc, #224] @ load rk14 5084 5085 aese $ctr4b, $rk12 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 12 5086 aese $ctr2b, $rk12 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 12 5087 aese $ctr1b, $rk12 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 12 5088 5089 aese $ctr0b, $rk12 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 12 5090 aese $ctr5b, $rk12 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 12 5091 aese $ctr3b, $rk12 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 12 5092 5093 aese $ctr2b, $rk13 @ AES block 2 - round 13 5094 aese $ctr1b, $rk13 @ AES block 1 - round 13 5095 aese $ctr4b, $rk13 @ AES block 4 - round 13 5096 5097 aese $ctr6b, $rk12 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 12 5098 aese $ctr7b, $rk12 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 12 5099 5100 aese $ctr0b, $rk13 @ AES block 0 - round 13 5101 aese $ctr5b, $rk13 @ AES block 5 - round 13 5102 5103 aese $ctr6b, $rk13 @ AES block 6 - round 13 5104 aese $ctr7b, $rk13 @ AES block 7 - round 13 5105 aese $ctr3b, $rk13 @ AES block 3 - round 13 5106 5107 add $end_input_ptr, $input_ptr, $bit_length, lsr #3 @ end_input_ptr 5108 cmp $input_ptr, $main_end_input_ptr @ check if we have <= 8 blocks 5109 b.ge .L256_enc_tail @ handle tail 5110 5111 ldp $ctr_t0q, $ctr_t1q, [$input_ptr], #32 @ AES block 0, 1 - load plaintext 5112 5113 ldp $ctr_t2q, $ctr_t3q, [$input_ptr], #32 @ AES block 2, 3 - load plaintext 5114 5115 eor3 $res0b, $ctr_t0b, $ctr0b, $rk14 @ AES block 0 - result 5116 rev32 $ctr0.16b, $rtmp_ctr.16b @ CTR block 8 5117 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8 5118 5119 eor3 $res1b, $ctr_t1b, $ctr1b, $rk14 @ AES block 1 - result 5120 eor3 $res3b, $ctr_t3b, $ctr3b, $rk14 @ AES block 3 - result 5121 5122 rev32 $ctr1.16b, $rtmp_ctr.16b @ CTR block 9 5123 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 9 5124 ldp $ctr_t4q, $ctr_t5q, [$input_ptr], #32 @ AES block 4, 5 - load plaintext 5125 5126 ldp $ctr_t6q, $ctr_t7q, [$input_ptr], #32 @ AES block 6, 7 - load plaintext 5127 eor3 $res2b, $ctr_t2b, $ctr2b, $rk14 @ AES block 2 - result 5128 cmp $input_ptr, $main_end_input_ptr @ check if we have <= 8 blocks 5129 5130 rev32 $ctr2.16b, $rtmp_ctr.16b @ CTR block 10 5131 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 10 5132 stp $res0q, $res1q, [$output_ptr], #32 @ AES block 0, 1 - store result 5133 5134 stp $res2q, $res3q, [$output_ptr], #32 @ AES block 2, 3 - store result 5135 5136 rev32 $ctr3.16b, $rtmp_ctr.16b @ CTR block 11 5137 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 11 5138 5139 eor3 $res4b, $ctr_t4b, $ctr4b, $rk14 @ AES block 4 - result 5140 5141 eor3 $res7b, $ctr_t7b, $ctr7b, $rk14 @ AES block 7 - result 5142 eor3 $res6b, $ctr_t6b, $ctr6b, $rk14 @ AES block 6 - result 5143 eor3 $res5b, $ctr_t5b, $ctr5b, $rk14 @ AES block 5 - result 5144 5145 stp $res4q, $res5q, [$output_ptr], #32 @ AES block 4, 5 - store result 5146 rev32 $ctr4.16b, $rtmp_ctr.16b @ CTR block 12 5147 5148 stp $res6q, $res7q, [$output_ptr], #32 @ AES block 6, 7 - store result 5149 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 12 5150 b.ge .L256_enc_prepretail @ do prepretail 5151 5152.L256_enc_main_loop: @ main loop start 5153 ldp $rk0q, $rk1q, [$cc, #0] @ load rk0, rk1 5154 5155 rev32 $ctr5.16b, $rtmp_ctr.16b @ CTR block 8k+13 5156 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+13 5157 ldr $h56kq, [$current_tag, #144] @ load h6k | h5k 5158 ldr $h78kq, [$current_tag, #192] @ load h8k | h7k 5159 5160 rev64 $res3b, $res3b @ GHASH block 8k+3 5161 ldr $h5q, [$current_tag, #128] @ load h5l | h5h 5162 ext $h5.16b, $h5.16b, $h5.16b, #8 5163 ldr $h6q, [$current_tag, #160] @ load h6l | h6h 5164 ext $h6.16b, $h6.16b, $h6.16b, #8 5165 rev64 $res1b, $res1b @ GHASH block 8k+1 5166 5167 rev32 $ctr6.16b, $rtmp_ctr.16b @ CTR block 8k+14 5168 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+14 5169 rev64 $res0b, $res0b @ GHASH block 8k 5170 5171 rev64 $res4b, $res4b @ GHASH block 8k+4 5172 ext $acc_lb, $acc_lb, $acc_lb, #8 @ PRE 0 5173 ldr $h7q, [$current_tag, #176] @ load h7l | h7h 5174 ext $h7.16b, $h7.16b, $h7.16b, #8 5175 ldr $h8q, [$current_tag, #208] @ load h8l | h8h 5176 ext $h8.16b, $h8.16b, $h8.16b, #8 5177 5178 aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 0 5179 aese $ctr5b, $rk0 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 0 5180 rev32 $ctr7.16b, $rtmp_ctr.16b @ CTR block 8k+15 5181 5182 aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 0 5183 aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 0 5184 aese $ctr6b, $rk0 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 0 5185 5186 aese $ctr7b, $rk0 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 0 5187 aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 0 5188 aese $ctr4b, $rk0 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 0 5189 5190 ldp $rk2q, $rk3q, [$cc, #32] @ load rk2, rk3 5191 eor $res0b, $res0b, $acc_lb @ PRE 1 5192 aese $ctr6b, $rk1 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 1 5193 5194 aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 1 5195 aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 1 5196 aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 1 5197 5198 aese $ctr4b, $rk1 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 1 5199 aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 1 5200 aese $ctr5b, $rk1 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 1 5201 5202 pmull2 $acc_h.1q, $res0.2d, $h8.2d @ GHASH block 8k - high 5203 pmull $acc_l.1q, $res0.1d, $h8.1d @ GHASH block 8k - low 5204 pmull2 $t0.1q, $res1.2d, $h7.2d @ GHASH block 8k+1 - high 5205 5206 trn1 $acc_m.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid 5207 trn2 $res0.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid 5208 aese $ctr7b, $rk1 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 1 5209 5210 aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 2 5211 aese $ctr5b, $rk2 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 2 5212 aese $ctr6b, $rk2 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 2 5213 5214 aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 2 5215 pmull $h7.1q, $res1.1d, $h7.1d @ GHASH block 8k+1 - low 5216 aese $ctr4b, $rk2 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 2 5217 5218 aese $ctr5b, $rk3 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 3 5219 aese $ctr6b, $rk3 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 3 5220 aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 2 5221 5222 aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 3 5223 aese $ctr7b, $rk2 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 2 5224 aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 2 5225 5226 aese $ctr4b, $rk3 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 3 5227 rev64 $res6b, $res6b @ GHASH block 8k+6 5228 pmull2 $t2.1q, $res3.2d, $h5.2d @ GHASH block 8k+3 - high 5229 5230 aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 3 5231 ldp $rk4q, $rk5q, [$cc, #64] @ load rk4, rk5 5232 rev64 $res2b, $res2b @ GHASH block 8k+2 5233 5234 aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 3 5235 aese $ctr7b, $rk3 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 3 5236 aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 3 5237 5238 eor $acc_hb, $acc_hb, $t0.16b @ GHASH block 8k+1 - high 5239 pmull2 $t1.1q, $res2.2d, $h6.2d @ GHASH block 8k+2 - high 5240 rev64 $res5b, $res5b @ GHASH block 8k+5 5241 5242 pmull $h5.1q, $res3.1d, $h5.1d @ GHASH block 8k+3 - low 5243 eor $acc_lb, $acc_lb, $h7.16b @ GHASH block 8k+1 - low 5244 ldr $h3q, [$current_tag, #80] @ load h3l | h3h 5245 ext $h3.16b, $h3.16b, $h3.16b, #8 5246 ldr $h4q, [$current_tag, #112] @ load h4l | h4h 5247 ext $h4.16b, $h4.16b, $h4.16b, #8 5248 5249 trn1 $t6.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid 5250 eor3 $acc_hb, $acc_hb, $t1.16b, $t2.16b @ GHASH block 8k+2, 8k+3 - high 5251 pmull $h6.1q, $res2.1d, $h6.1d @ GHASH block 8k+2 - low 5252 5253 aese $ctr4b, $rk4 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 4 5254 aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 4 5255 aese $ctr5b, $rk4 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 4 5256 5257 aese $ctr7b, $rk4 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 4 5258 aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 4 5259 aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 4 5260 5261 trn1 $t3.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid 5262 aese $ctr6b, $rk4 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 4 5263 aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 4 5264 5265 trn2 $res2.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid 5266 eor $res0.16b, $res0.16b, $acc_m.16b @ GHASH block 8k, 8k+1 - mid 5267 ldp $rk6q, $rk7q, [$cc, #96] @ load rk6, rk7 5268 5269 aese $ctr5b, $rk5 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 5 5270 aese $ctr7b, $rk5 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 5 5271 aese $ctr4b, $rk5 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 5 5272 5273 eor $res2.16b, $res2.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid 5274 aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 5 5275 rev64 $res7b, $res7b @ GHASH block 8k+7 5276 5277 aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 5 5278 aese $ctr6b, $rk5 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 5 5279 aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 5 5280 5281 pmull2 $t3.1q, $res2.2d, $h56k.2d @ GHASH block 8k+2 - mid 5282 pmull2 $acc_m.1q, $res0.2d, $h78k.2d @ GHASH block 8k - mid 5283 aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 5 5284 5285 pmull $h78k.1q, $res0.1d, $h78k.1d @ GHASH block 8k+1 - mid 5286 aese $ctr4b, $rk6 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 6 5287 aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 6 5288 5289 aese $ctr6b, $rk6 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 6 5290 aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 6 5291 aese $ctr7b, $rk6 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 6 5292 5293 eor $acc_mb, $acc_mb, $h78k.16b @ GHASH block 8k+1 - mid 5294 pmull $h56k.1q, $res2.1d, $h56k.1d @ GHASH block 8k+3 - mid 5295 aese $ctr5b, $rk6 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 6 5296 5297 eor3 $acc_lb, $acc_lb, $h6.16b, $h5.16b @ GHASH block 8k+2, 8k+3 - low 5298 aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 6 5299 aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 6 5300 5301 ldp $rk8q, $rk9q, [$cc, #128] @ load rk8, rk9 5302 pmull2 $t4.1q, $res4.2d, $h4.2d @ GHASH block 8k+4 - high 5303 aese $ctr5b, $rk7 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 7 5304 5305 ldr $h1q, [$current_tag, #32] @ load h1l | h1h 5306 ext $h1.16b, $h1.16b, $h1.16b, #8 5307 ldr $h2q, [$current_tag, #64] @ load h2l | h2h 5308 ext $h2.16b, $h2.16b, $h2.16b, #8 5309 aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 7 5310 eor3 $acc_mb, $acc_mb, $h56k.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid 5311 5312 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k 5313 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k 5314 aese $ctr6b, $rk7 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 7 5315 aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 7 5316 5317 aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 7 5318 aese $ctr7b, $rk7 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 7 5319 pmull $h4.1q, $res4.1d, $h4.1d @ GHASH block 8k+4 - low 5320 5321 trn2 $res4.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid 5322 aese $ctr4b, $rk7 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 7 5323 aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 7 5324 5325 pmull2 $t5.1q, $res5.2d, $h3.2d @ GHASH block 8k+5 - high 5326 aese $ctr7b, $rk8 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 8 5327 aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 8 5328 5329 pmull $h3.1q, $res5.1d, $h3.1d @ GHASH block 8k+5 - low 5330 trn1 $t9.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid 5331 eor $res4.16b, $res4.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid 5332 5333 aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 8 5334 aese $ctr0b, $rk9 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 9 5335 aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 8 5336 5337 pmull2 $t6.1q, $res4.2d, $h34k.2d @ GHASH block 8k+4 - mid 5338 pmull $h34k.1q, $res4.1d, $h34k.1d @ GHASH block 8k+5 - mid 5339 aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 8 5340 5341 aese $ctr5b, $rk8 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 8 5342 pmull2 $t7.1q, $res6.2d, $h2.2d @ GHASH block 8k+6 - high 5343 pmull $h2.1q, $res6.1d, $h2.1d @ GHASH block 8k+6 - low 5344 5345 aese $ctr6b, $rk8 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 8 5346 trn2 $res6.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid 5347 aese $ctr4b, $rk8 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 8 5348 5349 eor3 $acc_mb, $acc_mb, $h34k.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid 5350 aese $ctr7b, $rk9 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 9 5351 aese $ctr5b, $rk9 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 9 5352 5353 eor $res6.16b, $res6.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid 5354 aese $ctr6b, $rk9 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 9 5355 aese $ctr4b, $rk9 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 9 5356 5357 ldp $rk10q, $rk11q, [$cc, #160] @ load rk10, rk11 5358 aese $ctr2b, $rk9 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 9 5359 aese $ctr3b, $rk9 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 9 5360 5361 pmull2 $t8.1q, $res7.2d, $h1.2d @ GHASH block 8k+7 - high 5362 eor3 $acc_lb, $acc_lb, $h4.16b, $h3.16b @ GHASH block 8k+4, 8k+5 - low 5363 pmull $h1.1q, $res7.1d, $h1.1d @ GHASH block 8k+7 - low 5364 5365 ldr $mod_constantd, [$modulo_constant] @ MODULO - load modulo constant 5366 pmull2 $t9.1q, $res6.2d, $h12k.2d @ GHASH block 8k+6 - mid 5367 pmull $h12k.1q, $res6.1d, $h12k.1d @ GHASH block 8k+7 - mid 5368 5369 aese $ctr1b, $rk9 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 9 5370 5371 eor3 $acc_mb, $acc_mb, $h12k.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid 5372 eor3 $acc_lb, $acc_lb, $h2.16b, $h1.16b @ GHASH block 8k+6, 8k+7 - low 5373 eor3 $acc_hb, $acc_hb, $t4.16b, $t5.16b @ GHASH block 8k+4, 8k+5 - high 5374 5375 aese $ctr4b, $rk10 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 10 5376 aese $ctr3b, $rk10 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 10 5377 aese $ctr5b, $rk10 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 10 5378 5379 aese $ctr0b, $rk10 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 10 5380 aese $ctr2b, $rk10 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 10 5381 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+15 5382 5383 aese $ctr1b, $rk10 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 10 5384 aese $ctr7b, $rk10 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 10 5385 aese $ctr6b, $rk10 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 10 5386 5387 eor3 $acc_hb, $acc_hb, $t7.16b, $t8.16b @ GHASH block 8k+6, 8k+7 - high 5388 5389 ldp $rk12q, $rk13q, [$cc, #192] @ load rk12, rk13 5390 rev32 $h1.16b, $rtmp_ctr.16b @ CTR block 8k+16 5391 5392 ext $t11.16b, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment 5393 ldp $ctr_t0q, $ctr_t1q, [$input_ptr], #32 @ AES block 8k+8, 8k+9 - load plaintext 5394 aese $ctr2b, $rk11 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 11 5395 5396 aese $ctr6b, $rk11 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 11 5397 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+16 5398 aese $ctr3b, $rk11 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 11 5399 5400 aese $ctr0b, $rk11 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 11 5401 aese $ctr7b, $rk11 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 11 5402 5403 pmull $t12.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid 5404 aese $ctr1b, $rk11 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 11 5405 5406 aese $ctr7b, $rk12 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 12 5407 aese $ctr5b, $rk11 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 11 5408 5409 aese $ctr3b, $rk12 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 12 5410 aese $ctr6b, $rk12 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 12 5411 rev32 $h2.16b, $rtmp_ctr.16b @ CTR block 8k+17 5412 5413 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+17 5414 aese $ctr4b, $rk11 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 11 5415 eor3 $acc_mb, $acc_mb, $acc_hb, $acc_lb @ MODULO - karatsuba tidy up 5416 5417 aese $ctr5b, $rk12 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 12 5418 ldr $rk14q, [$cc, #224] @ load rk14 5419 aese $ctr7b, $rk13 @ AES block 8k+15 - round 13 5420 5421 ldp $ctr_t2q, $ctr_t3q, [$input_ptr], #32 @ AES block 8k+10, 8k+11 - load plaintext 5422 aese $ctr2b, $rk12 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 12 5423 aese $ctr4b, $rk12 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 12 5424 5425 eor3 $acc_mb, $acc_mb, $t12.16b, $t11.16b @ MODULO - fold into mid 5426 aese $ctr1b, $rk12 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 12 5427 ldp $ctr_t4q, $ctr_t5q, [$input_ptr], #32 @ AES block 4, 5 - load plaintext 5428 5429 ldp $ctr_t6q, $ctr_t7q, [$input_ptr], #32 @ AES block 6, 7 - load plaintext 5430 aese $ctr2b, $rk13 @ AES block 8k+10 - round 13 5431 aese $ctr4b, $rk13 @ AES block 8k+12 - round 13 5432 5433 rev32 $h3.16b, $rtmp_ctr.16b @ CTR block 8k+18 5434 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+18 5435 aese $ctr5b, $rk13 @ AES block 8k+13 - round 13 5436 5437 aese $ctr0b, $rk12 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 12 5438 aese $ctr3b, $rk13 @ AES block 8k+11 - round 13 5439 cmp $input_ptr, $main_end_input_ptr @ LOOP CONTROL 5440 5441 eor3 $res2b, $ctr_t2b, $ctr2b, $rk14 @ AES block 8k+10 - result 5442 rev32 $h4.16b, $rtmp_ctr.16b @ CTR block 8k+19 5443 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+19 5444 5445 aese $ctr0b, $rk13 @ AES block 8k+8 - round 13 5446 aese $ctr6b, $rk13 @ AES block 8k+14 - round 13 5447 eor3 $res5b, $ctr_t5b, $ctr5b, $rk14 @ AES block 5 - result 5448 5449 ext $t11.16b, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment 5450 pmull $acc_h.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low 5451 aese $ctr1b, $rk13 @ AES block 8k+9 - round 13 5452 5453 eor3 $res4b, $ctr_t4b, $ctr4b, $rk14 @ AES block 4 - result 5454 rev32 $ctr4.16b, $rtmp_ctr.16b @ CTR block 8k+20 5455 eor3 $res3b, $ctr_t3b, $ctr3b, $rk14 @ AES block 8k+11 - result 5456 5457 mov $ctr3.16b, $h4.16b @ CTR block 8k+19 5458 eor3 $res1b, $ctr_t1b, $ctr1b, $rk14 @ AES block 8k+9 - result 5459 eor3 $res0b, $ctr_t0b, $ctr0b, $rk14 @ AES block 8k+8 - result 5460 5461 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+20 5462 stp $res0q, $res1q, [$output_ptr], #32 @ AES block 8k+8, 8k+9 - store result 5463 mov $ctr2.16b, $h3.16b @ CTR block 8k+18 5464 5465 eor3 $res7b, $ctr_t7b, $ctr7b, $rk14 @ AES block 7 - result 5466 eor3 $acc_lb, $acc_lb, $t11.16b, $acc_hb @ MODULO - fold into low 5467 stp $res2q, $res3q, [$output_ptr], #32 @ AES block 8k+10, 8k+11 - store result 5468 5469 eor3 $res6b, $ctr_t6b, $ctr6b, $rk14 @ AES block 6 - result 5470 mov $ctr1.16b, $h2.16b @ CTR block 8k+17 5471 stp $res4q, $res5q, [$output_ptr], #32 @ AES block 4, 5 - store result 5472 5473 stp $res6q, $res7q, [$output_ptr], #32 @ AES block 6, 7 - store result 5474 mov $ctr0.16b, $h1.16b @ CTR block 8k+16 5475 b.lt .L256_enc_main_loop 5476 5477.L256_enc_prepretail: @ PREPRETAIL 5478 rev32 $ctr5.16b, $rtmp_ctr.16b @ CTR block 8k+13 5479 ldp $rk0q, $rk1q, [$cc, #0] @ load rk0, rk1 5480 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+13 5481 5482 rev64 $res2b, $res2b @ GHASH block 8k+2 5483 5484 rev32 $ctr6.16b, $rtmp_ctr.16b @ CTR block 8k+14 5485 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+14 5486 5487 rev64 $res5b, $res5b @ GHASH block 8k+5 5488 ldr $h56kq, [$current_tag, #144] @ load h6k | h5k 5489 ldr $h78kq, [$current_tag, #192] @ load h8k | h7k 5490 5491 rev32 $ctr7.16b, $rtmp_ctr.16b @ CTR block 8k+15 5492 5493 aese $ctr6b, $rk0 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 0 5494 aese $ctr4b, $rk0 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 0 5495 aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 0 5496 5497 aese $ctr5b, $rk0 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 0 5498 aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 0 5499 5500 aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 0 5501 aese $ctr7b, $rk0 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 0 5502 aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 0 5503 5504 ext $acc_lb, $acc_lb, $acc_lb, #8 @ PRE 0 5505 rev64 $res0b, $res0b @ GHASH block 8k 5506 aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 1 5507 5508 rev64 $res1b, $res1b @ GHASH block 8k+1 5509 ldp $rk2q, $rk3q, [$cc, #32] @ load rk2, rk3 5510 aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 1 5511 5512 ldr $h7q, [$current_tag, #176] @ load h7l | h7h 5513 ext $h7.16b, $h7.16b, $h7.16b, #8 5514 ldr $h8q, [$current_tag, #208] @ load h8l | h8h 5515 ext $h8.16b, $h8.16b, $h8.16b, #8 5516 aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 1 5517 5518 ldr $h5q, [$current_tag, #128] @ load h5l | h5h 5519 ext $h5.16b, $h5.16b, $h5.16b, #8 5520 ldr $h6q, [$current_tag, #160] @ load h6l | h6h 5521 ext $h6.16b, $h6.16b, $h6.16b, #8 5522 aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 1 5523 aese $ctr5b, $rk1 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 1 5524 5525 aese $ctr4b, $rk1 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 1 5526 eor $res0b, $res0b, $acc_lb @ PRE 1 5527 5528 rev64 $res3b, $res3b @ GHASH block 8k+3 5529 aese $ctr6b, $rk1 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 1 5530 5531 aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 2 5532 aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 2 5533 aese $ctr7b, $rk1 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 1 5534 5535 aese $ctr4b, $rk2 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 2 5536 aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 2 5537 aese $ctr6b, $rk2 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 2 5538 5539 aese $ctr5b, $rk2 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 2 5540 aese $ctr7b, $rk2 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 2 5541 aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 2 5542 5543 ldp $rk4q, $rk5q, [$cc, #64] @ load rk4, rk5 5544 trn1 $acc_m.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid 5545 pmull2 $acc_h.1q, $res0.2d, $h8.2d @ GHASH block 8k - high 5546 5547 rev64 $res6b, $res6b @ GHASH block 8k+6 5548 aese $ctr4b, $rk3 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 3 5549 pmull2 $t0.1q, $res1.2d, $h7.2d @ GHASH block 8k+1 - high 5550 5551 aese $ctr7b, $rk3 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 3 5552 pmull $acc_l.1q, $res0.1d, $h8.1d @ GHASH block 8k - low 5553 trn2 $res0.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid 5554 5555 pmull2 $t1.1q, $res2.2d, $h6.2d @ GHASH block 8k+2 - high 5556 aese $ctr6b, $rk3 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 3 5557 5558 aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 3 5559 aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 3 5560 eor $acc_hb, $acc_hb, $t0.16b @ GHASH block 8k+1 - high 5561 5562 pmull $h7.1q, $res1.1d, $h7.1d @ GHASH block 8k+1 - low 5563 pmull2 $t2.1q, $res3.2d, $h5.2d @ GHASH block 8k+3 - high 5564 aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 3 5565 5566 aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 3 5567 eor $res0.16b, $res0.16b, $acc_m.16b @ GHASH block 8k, 8k+1 - mid 5568 aese $ctr5b, $rk3 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 3 5569 5570 pmull $h6.1q, $res2.1d, $h6.1d @ GHASH block 8k+2 - low 5571 aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 4 5572 aese $ctr6b, $rk4 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 4 5573 5574 aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 4 5575 aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 4 5576 aese $ctr4b, $rk4 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 4 5577 5578 aese $ctr6b, $rk5 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 5 5579 pmull2 $acc_m.1q, $res0.2d, $h78k.2d @ GHASH block 8k - mid 5580 eor3 $acc_hb, $acc_hb, $t1.16b, $t2.16b @ GHASH block 8k+2, 8k+3 - high 5581 5582 aese $ctr7b, $rk4 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 4 5583 trn1 $t3.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid 5584 trn2 $res2.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid 5585 5586 aese $ctr5b, $rk4 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 4 5587 eor $acc_lb, $acc_lb, $h7.16b @ GHASH block 8k+1 - low 5588 aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 4 5589 5590 pmull $h5.1q, $res3.1d, $h5.1d @ GHASH block 8k+3 - low 5591 pmull $h78k.1q, $res0.1d, $h78k.1d @ GHASH block 8k+1 - mid 5592 eor $res2.16b, $res2.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid 5593 5594 rev64 $res4b, $res4b @ GHASH block 8k+4 5595 aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 5 5596 aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 5 5597 5598 aese $ctr7b, $rk5 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 5 5599 aese $ctr4b, $rk5 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 5 5600 ldp $rk6q, $rk7q, [$cc, #96] @ load rk6, rk7 5601 5602 ldr $h3q, [$current_tag, #80] @ load h3l | h3h 5603 ext $h3.16b, $h3.16b, $h3.16b, #8 5604 ldr $h4q, [$current_tag, #112] @ load h4l | h4h 5605 ext $h4.16b, $h4.16b, $h4.16b, #8 5606 pmull2 $t3.1q, $res2.2d, $h56k.2d @ GHASH block 8k+2 - mid 5607 pmull $h56k.1q, $res2.1d, $h56k.1d @ GHASH block 8k+3 - mid 5608 5609 eor3 $acc_lb, $acc_lb, $h6.16b, $h5.16b @ GHASH block 8k+2, 8k+3 - low 5610 eor $acc_mb, $acc_mb, $h78k.16b @ GHASH block 8k+1 - mid 5611 5612 aese $ctr5b, $rk5 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 5 5613 rev64 $res7b, $res7b @ GHASH block 8k+7 5614 trn1 $t6.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid 5615 5616 aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 5 5617 aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 5 5618 eor3 $acc_mb, $acc_mb, $h56k.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid 5619 5620 aese $ctr7b, $rk6 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 6 5621 aese $ctr4b, $rk6 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 6 5622 aese $ctr6b, $rk6 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 6 5623 5624 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k 5625 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k 5626 aese $ctr5b, $rk6 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 6 5627 aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 6 5628 5629 aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 6 5630 aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 6 5631 aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 6 5632 5633 pmull2 $t4.1q, $res4.2d, $h4.2d @ GHASH block 8k+4 - high 5634 pmull $h4.1q, $res4.1d, $h4.1d @ GHASH block 8k+4 - low 5635 ldr $h1q, [$current_tag, #32] @ load h1l | h1h 5636 ext $h1.16b, $h1.16b, $h1.16b, #8 5637 ldr $h2q, [$current_tag, #64] @ load h2l | h2h 5638 ext $h2.16b, $h2.16b, $h2.16b, #8 5639 5640 ldp $rk8q, $rk9q, [$cc, #128] @ load rk8, rk9 5641 aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 7 5642 aese $ctr4b, $rk7 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 7 5643 5644 pmull2 $t5.1q, $res5.2d, $h3.2d @ GHASH block 8k+5 - high 5645 trn2 $res4.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid 5646 5647 aese $ctr5b, $rk7 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 7 5648 aese $ctr6b, $rk7 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 7 5649 pmull $h3.1q, $res5.1d, $h3.1d @ GHASH block 8k+5 - low 5650 5651 aese $ctr7b, $rk7 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 7 5652 aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 7 5653 eor $res4.16b, $res4.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid 5654 5655 pmull2 $t7.1q, $res6.2d, $h2.2d @ GHASH block 8k+6 - high 5656 pmull $h2.1q, $res6.1d, $h2.1d @ GHASH block 8k+6 - low 5657 aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 7 5658 5659 trn1 $t9.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid 5660 trn2 $res6.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid 5661 aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 7 5662 5663 aese $ctr7b, $rk8 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 8 5664 eor3 $acc_lb, $acc_lb, $h4.16b, $h3.16b @ GHASH block 8k+4, 8k+5 - low 5665 aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 8 5666 5667 aese $ctr6b, $rk8 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 8 5668 aese $ctr4b, $rk8 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 8 5669 aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 8 5670 5671 aese $ctr5b, $rk8 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 8 5672 eor $res6.16b, $res6.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid 5673 aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 8 5674 5675 pmull2 $t6.1q, $res4.2d, $h34k.2d @ GHASH block 8k+4 - mid 5676 pmull $h34k.1q, $res4.1d, $h34k.1d @ GHASH block 8k+5 - mid 5677 aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 8 5678 5679 pmull2 $t8.1q, $res7.2d, $h1.2d @ GHASH block 8k+7 - high 5680 pmull2 $t9.1q, $res6.2d, $h12k.2d @ GHASH block 8k+6 - mid 5681 pmull $h12k.1q, $res6.1d, $h12k.1d @ GHASH block 8k+7 - mid 5682 5683 pmull $h1.1q, $res7.1d, $h1.1d @ GHASH block 8k+7 - low 5684 eor3 $acc_mb, $acc_mb, $h34k.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid 5685 eor3 $acc_hb, $acc_hb, $t4.16b, $t5.16b @ GHASH block 8k+4, 8k+5 - high 5686 5687 ldp $rk10q, $rk11q, [$cc, #160] @ load rk10, rk11 5688 aese $ctr1b, $rk9 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 9 5689 aese $ctr0b, $rk9 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 9 5690 5691 eor3 $acc_hb, $acc_hb, $t7.16b, $t8.16b @ GHASH block 8k+6, 8k+7 - high 5692 eor3 $acc_mb, $acc_mb, $h12k.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid 5693 ldr $mod_constantd, [$modulo_constant] @ MODULO - load modulo constant 5694 5695 eor3 $acc_lb, $acc_lb, $h2.16b, $h1.16b @ GHASH block 8k+6, 8k+7 - low 5696 5697 aese $ctr3b, $rk9 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 9 5698 aese $ctr7b, $rk9 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 9 5699 aese $ctr5b, $rk9 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 9 5700 5701 aese $ctr2b, $rk9 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 9 5702 aese $ctr6b, $rk9 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 9 5703 5704 aese $ctr5b, $rk10 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 10 5705 aese $ctr1b, $rk10 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 10 5706 aese $ctr4b, $rk9 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 9 5707 5708 aese $ctr7b, $rk10 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 10 5709 aese $ctr6b, $rk10 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 10 5710 aese $ctr3b, $rk10 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 10 5711 5712 aese $ctr4b, $rk10 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 10 5713 aese $ctr0b, $rk10 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 10 5714 aese $ctr2b, $rk10 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 10 5715 5716 pmull $t12.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid 5717 eor3 $acc_mb, $acc_mb, $acc_hb, $acc_lb @ MODULO - karatsuba tidy up 5718 aese $ctr7b, $rk11 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 11 5719 5720 ldp $rk12q, $rk13q, [$cc, #192] @ load rk12, rk13 5721 ext $t11.16b, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment 5722 aese $ctr2b, $rk11 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 11 5723 5724 eor3 $acc_mb, $acc_mb, $t12.16b, $t11.16b @ MODULO - fold into mid 5725 aese $ctr1b, $rk11 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 11 5726 aese $ctr6b, $rk11 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 11 5727 5728 aese $ctr0b, $rk11 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 11 5729 aese $ctr4b, $rk11 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 11 5730 aese $ctr5b, $rk11 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 11 5731 5732 pmull $acc_h.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low 5733 aese $ctr3b, $rk11 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 11 5734 ldr $rk14q, [$cc, #224] @ load rk14 5735 5736 aese $ctr1b, $rk12 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 12 5737 aese $ctr2b, $rk12 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 12 5738 aese $ctr0b, $rk12 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 12 5739 5740 aese $ctr6b, $rk12 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 12 5741 aese $ctr5b, $rk12 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 12 5742 ext $t11.16b, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment 5743 5744 aese $ctr4b, $rk12 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 12 5745 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+15 5746 5747 aese $ctr3b, $rk12 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 12 5748 aese $ctr7b, $rk12 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 12 5749 aese $ctr0b, $rk13 @ AES block 8k+8 - round 13 5750 5751 eor3 $acc_lb, $acc_lb, $t11.16b, $acc_hb @ MODULO - fold into low 5752 aese $ctr5b, $rk13 @ AES block 8k+13 - round 13 5753 aese $ctr1b, $rk13 @ AES block 8k+9 - round 13 5754 5755 aese $ctr3b, $rk13 @ AES block 8k+11 - round 13 5756 aese $ctr4b, $rk13 @ AES block 8k+12 - round 13 5757 aese $ctr7b, $rk13 @ AES block 8k+15 - round 13 5758 5759 aese $ctr2b, $rk13 @ AES block 8k+10 - round 13 5760 aese $ctr6b, $rk13 @ AES block 8k+14 - round 13 5761.L256_enc_tail: @ TAIL 5762 5763 ldp $h78kq, $h8q, [$current_tag, #192] @ load h8l | h8h 5764 ext $h8.16b, $h8.16b, $h8.16b, #8 5765 sub $main_end_input_ptr, $end_input_ptr, $input_ptr @ main_end_input_ptr is number of bytes left to process 5766 5767 ldr $ctr_t0q, [$input_ptr], #16 @ AES block 8k+8 - load plaintext 5768 5769 ldp $h5q, $h56kq, [$current_tag, #128] @ load h5l | h5h 5770 ext $h5.16b, $h5.16b, $h5.16b, #8 5771 5772 ext $t0.16b, $acc_lb, $acc_lb, #8 @ prepare final partial tag 5773 ldp $h6q, $h7q, [$current_tag, #160] @ load h6l | h6h 5774 ext $h6.16b, $h6.16b, $h6.16b, #8 5775 ext $h7.16b, $h7.16b, $h7.16b, #8 5776 mov $t1.16b, $rk14 5777 5778 cmp $main_end_input_ptr, #112 5779 eor3 $res1b, $ctr_t0b, $ctr0b, $t1.16b @ AES block 8k+8 - result 5780 b.gt .L256_enc_blocks_more_than_7 5781 5782 movi $acc_l.8b, #0 5783 mov $ctr7b, $ctr6b 5784 movi $acc_h.8b, #0 5785 5786 mov $ctr6b, $ctr5b 5787 mov $ctr5b, $ctr4b 5788 mov $ctr4b, $ctr3b 5789 5790 mov $ctr3b, $ctr2b 5791 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 5792 mov $ctr2b, $ctr1b 5793 5794 movi $acc_m.8b, #0 5795 cmp $main_end_input_ptr, #96 5796 b.gt .L256_enc_blocks_more_than_6 5797 5798 mov $ctr7b, $ctr6b 5799 mov $ctr6b, $ctr5b 5800 cmp $main_end_input_ptr, #80 5801 5802 mov $ctr5b, $ctr4b 5803 mov $ctr4b, $ctr3b 5804 mov $ctr3b, $ctr1b 5805 5806 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 5807 b.gt .L256_enc_blocks_more_than_5 5808 5809 mov $ctr7b, $ctr6b 5810 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 5811 5812 mov $ctr6b, $ctr5b 5813 mov $ctr5b, $ctr4b 5814 5815 cmp $main_end_input_ptr, #64 5816 mov $ctr4b, $ctr1b 5817 b.gt .L256_enc_blocks_more_than_4 5818 5819 cmp $main_end_input_ptr, #48 5820 mov $ctr7b, $ctr6b 5821 mov $ctr6b, $ctr5b 5822 5823 mov $ctr5b, $ctr1b 5824 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 5825 b.gt .L256_enc_blocks_more_than_3 5826 5827 cmp $main_end_input_ptr, #32 5828 mov $ctr7b, $ctr6b 5829 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k 5830 5831 mov $ctr6b, $ctr1b 5832 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 5833 b.gt .L256_enc_blocks_more_than_2 5834 5835 mov $ctr7b, $ctr1b 5836 5837 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 5838 cmp $main_end_input_ptr, #16 5839 b.gt .L256_enc_blocks_more_than_1 5840 5841 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 5842 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k 5843 b .L256_enc_blocks_less_than_1 5844.L256_enc_blocks_more_than_7: @ blocks left > 7 5845 st1 { $res1b}, [$output_ptr], #16 @ AES final-7 block - store result 5846 5847 rev64 $res0b, $res1b @ GHASH final-7 block 5848 5849 eor $res0b, $res0b, $t0.16b @ feed in partial tag 5850 5851 ldr $ctr_t1q, [$input_ptr], #16 @ AES final-6 block - load plaintext 5852 5853 pmull2 $acc_h.1q, $res0.2d, $h8.2d @ GHASH final-7 block - high 5854 ins $rk4v.d[0], $res0.d[1] @ GHASH final-7 block - mid 5855 ins $acc_m.d[0], $h78k.d[1] @ GHASH final-7 block - mid 5856 5857 movi $t0.8b, #0 @ supress further partial tag feed in 5858 5859 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-7 block - mid 5860 eor3 $res1b, $ctr_t1b, $ctr1b, $t1.16b @ AES final-6 block - result 5861 5862 pmull $acc_m.1q, $rk4v.1d, $acc_m.1d @ GHASH final-7 block - mid 5863 pmull $acc_l.1q, $res0.1d, $h8.1d @ GHASH final-7 block - low 5864.L256_enc_blocks_more_than_6: @ blocks left > 6 5865 5866 st1 { $res1b}, [$output_ptr], #16 @ AES final-6 block - store result 5867 5868 rev64 $res0b, $res1b @ GHASH final-6 block 5869 5870 eor $res0b, $res0b, $t0.16b @ feed in partial tag 5871 5872 pmull $rk3q1, $res0.1d, $h7.1d @ GHASH final-6 block - low 5873 ins $rk4v.d[0], $res0.d[1] @ GHASH final-6 block - mid 5874 pmull2 $rk2q1, $res0.2d, $h7.2d @ GHASH final-6 block - high 5875 5876 ldr $ctr_t1q, [$input_ptr], #16 @ AES final-5 block - load plaintext 5877 5878 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-6 block - low 5879 5880 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-6 block - mid 5881 5882 pmull $rk4v.1q, $rk4v.1d, $h78k.1d @ GHASH final-6 block - mid 5883 eor3 $res1b, $ctr_t1b, $ctr2b, $t1.16b @ AES final-5 block - result 5884 5885 movi $t0.8b, #0 @ supress further partial tag feed in 5886 5887 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-6 block - mid 5888 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-6 block - high 5889.L256_enc_blocks_more_than_5: @ blocks left > 5 5890 5891 st1 { $res1b}, [$output_ptr], #16 @ AES final-5 block - store result 5892 5893 rev64 $res0b, $res1b @ GHASH final-5 block 5894 5895 eor $res0b, $res0b, $t0.16b @ feed in partial tag 5896 5897 ins $rk4v.d[0], $res0.d[1] @ GHASH final-5 block - mid 5898 5899 pmull2 $rk2q1, $res0.2d, $h6.2d @ GHASH final-5 block - high 5900 5901 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-5 block - high 5902 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-5 block - mid 5903 5904 ins $rk4v.d[1], $rk4v.d[0] @ GHASH final-5 block - mid 5905 5906 ldr $ctr_t1q, [$input_ptr], #16 @ AES final-4 block - load plaintext 5907 pmull $rk3q1, $res0.1d, $h6.1d @ GHASH final-5 block - low 5908 5909 pmull2 $rk4v.1q, $rk4v.2d, $h56k.2d @ GHASH final-5 block - mid 5910 movi $t0.8b, #0 @ supress further partial tag feed in 5911 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-5 block - low 5912 5913 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-5 block - mid 5914 eor3 $res1b, $ctr_t1b, $ctr3b, $t1.16b @ AES final-4 block - result 5915.L256_enc_blocks_more_than_4: @ blocks left > 4 5916 5917 st1 { $res1b}, [$output_ptr], #16 @ AES final-4 block - store result 5918 5919 rev64 $res0b, $res1b @ GHASH final-4 block 5920 5921 ldr $ctr_t1q, [$input_ptr], #16 @ AES final-3 block - load plaintext 5922 5923 eor $res0b, $res0b, $t0.16b @ feed in partial tag 5924 5925 ins $rk4v.d[0], $res0.d[1] @ GHASH final-4 block - mid 5926 pmull2 $rk2q1, $res0.2d, $h5.2d @ GHASH final-4 block - high 5927 5928 eor3 $res1b, $ctr_t1b, $ctr4b, $t1.16b @ AES final-3 block - result 5929 pmull $rk3q1, $res0.1d, $h5.1d @ GHASH final-4 block - low 5930 5931 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-4 block - mid 5932 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-4 block - low 5933 5934 pmull $rk4v.1q, $rk4v.1d, $h56k.1d @ GHASH final-4 block - mid 5935 5936 movi $t0.8b, #0 @ supress further partial tag feed in 5937 5938 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-4 block - mid 5939 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-4 block - high 5940.L256_enc_blocks_more_than_3: @ blocks left > 3 5941 5942 st1 { $res1b}, [$output_ptr], #16 @ AES final-3 block - store result 5943 5944 ldr $h4q, [$current_tag, #112] @ load h4l | h4h 5945 ext $h4.16b, $h4.16b, $h4.16b, #8 5946 rev64 $res0b, $res1b @ GHASH final-3 block 5947 5948 eor $res0b, $res0b, $t0.16b @ feed in partial tag 5949 5950 ins $rk4v.d[0], $res0.d[1] @ GHASH final-3 block - mid 5951 pmull2 $rk2q1, $res0.2d, $h4.2d @ GHASH final-3 block - high 5952 5953 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-3 block - high 5954 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-3 block - mid 5955 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k 5956 5957 ins $rk4v.d[1], $rk4v.d[0] @ GHASH final-3 block - mid 5958 ldr $ctr_t1q, [$input_ptr], #16 @ AES final-2 block - load plaintext 5959 5960 pmull2 $rk4v.1q, $rk4v.2d, $h34k.2d @ GHASH final-3 block - mid 5961 pmull $rk3q1, $res0.1d, $h4.1d @ GHASH final-3 block - low 5962 5963 eor3 $res1b, $ctr_t1b, $ctr5b, $t1.16b @ AES final-2 block - result 5964 movi $t0.8b, #0 @ supress further partial tag feed in 5965 5966 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-3 block - mid 5967 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-3 block - low 5968.L256_enc_blocks_more_than_2: @ blocks left > 2 5969 5970 ldr $h3q, [$current_tag, #80] @ load h3l | h3h 5971 ext $h3.16b, $h3.16b, $h3.16b, #8 5972 5973 st1 { $res1b}, [$output_ptr], #16 @ AES final-2 block - store result 5974 5975 rev64 $res0b, $res1b @ GHASH final-2 block 5976 ldr $ctr_t1q, [$input_ptr], #16 @ AES final-1 block - load plaintext 5977 5978 eor $res0b, $res0b, $t0.16b @ feed in partial tag 5979 5980 ins $rk4v.d[0], $res0.d[1] @ GHASH final-2 block - mid 5981 5982 movi $t0.8b, #0 @ supress further partial tag feed in 5983 5984 pmull2 $rk2q1, $res0.2d, $h3.2d @ GHASH final-2 block - high 5985 eor3 $res1b, $ctr_t1b, $ctr6b, $t1.16b @ AES final-1 block - result 5986 5987 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-2 block - mid 5988 5989 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-2 block - high 5990 5991 pmull $rk4v.1q, $rk4v.1d, $h34k.1d @ GHASH final-2 block - mid 5992 pmull $rk3q1, $res0.1d, $h3.1d @ GHASH final-2 block - low 5993 5994 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-2 block - mid 5995 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-2 block - low 5996.L256_enc_blocks_more_than_1: @ blocks left > 1 5997 5998 st1 { $res1b}, [$output_ptr], #16 @ AES final-1 block - store result 5999 6000 ldr $h2q, [$current_tag, #64] @ load h2l | h2h 6001 ext $h2.16b, $h2.16b, $h2.16b, #8 6002 rev64 $res0b, $res1b @ GHASH final-1 block 6003 ldr $ctr_t1q, [$input_ptr], #16 @ AES final block - load plaintext 6004 6005 eor $res0b, $res0b, $t0.16b @ feed in partial tag 6006 movi $t0.8b, #0 @ supress further partial tag feed in 6007 6008 ins $rk4v.d[0], $res0.d[1] @ GHASH final-1 block - mid 6009 pmull2 $rk2q1, $res0.2d, $h2.2d @ GHASH final-1 block - high 6010 6011 eor3 $res1b, $ctr_t1b, $ctr7b, $t1.16b @ AES final block - result 6012 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-1 block - high 6013 6014 pmull $rk3q1, $res0.1d, $h2.1d @ GHASH final-1 block - low 6015 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-1 block - mid 6016 6017 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k 6018 6019 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-1 block - low 6020 ins $rk4v.d[1], $rk4v.d[0] @ GHASH final-1 block - mid 6021 6022 pmull2 $rk4v.1q, $rk4v.2d, $h12k.2d @ GHASH final-1 block - mid 6023 6024 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-1 block - mid 6025.L256_enc_blocks_less_than_1: @ blocks left <= 1 6026 6027 and $bit_length, $bit_length, #127 @ bit_length %= 128 6028 6029 sub $bit_length, $bit_length, #128 @ bit_length -= 128 6030 6031 neg $bit_length, $bit_length @ bit_length = 128 - #bits in input (in range [1,128]) 6032 6033 mvn $temp0_x, xzr @ temp0_x = 0xffffffffffffffff 6034 and $bit_length, $bit_length, #127 @ bit_length %= 128 6035 6036 lsr $temp0_x, $temp0_x, $bit_length @ temp0_x is mask for top 64b of last block 6037 cmp $bit_length, #64 6038 mvn $temp1_x, xzr @ temp1_x = 0xffffffffffffffff 6039 6040 csel $temp3_x, $temp0_x, xzr, lt 6041 csel $temp2_x, $temp1_x, $temp0_x, lt 6042 6043 mov $ctr0.d[0], $temp2_x @ ctr0b is mask for last block 6044 ldr $h1q, [$current_tag, #32] @ load h1l | h1h 6045 ext $h1.16b, $h1.16b, $h1.16b, #8 6046 6047 ld1 { $rk0}, [$output_ptr] @ load existing bytes where the possibly partial last block is to be stored 6048 mov $ctr0.d[1], $temp3_x 6049 6050 and $res1b, $res1b, $ctr0b @ possibly partial last block has zeroes in highest bits 6051 6052 rev64 $res0b, $res1b @ GHASH final block 6053 6054 rev32 $rtmp_ctr.16b, $rtmp_ctr.16b 6055 bif $res1b, $rk0, $ctr0b @ insert existing bytes in top end of result before storing 6056 str $rtmp_ctrq, [$counter] @ store the updated counter 6057 6058 eor $res0b, $res0b, $t0.16b @ feed in partial tag 6059 st1 { $res1b}, [$output_ptr] @ store all 16B 6060 6061 ins $t0.d[0], $res0.d[1] @ GHASH final block - mid 6062 pmull2 $rk2q1, $res0.2d, $h1.2d @ GHASH final block - high 6063 pmull $rk3q1, $res0.1d, $h1.1d @ GHASH final block - low 6064 6065 eor $acc_hb, $acc_hb, $rk2 @ GHASH final block - high 6066 eor $acc_lb, $acc_lb, $rk3 @ GHASH final block - low 6067 6068 eor $t0.8b, $t0.8b, $res0.8b @ GHASH final block - mid 6069 6070 pmull $t0.1q, $t0.1d, $h12k.1d @ GHASH final block - mid 6071 6072 eor $acc_mb, $acc_mb, $t0.16b @ GHASH final block - mid 6073 ldr $mod_constantd, [$modulo_constant] @ MODULO - load modulo constant 6074 6075 ext $t11.16b, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment 6076 6077 eor3 $acc_mb, $acc_mb, $acc_hb, $acc_lb @ MODULO - karatsuba tidy up 6078 pmull $t12.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid 6079 6080 eor3 $acc_mb, $acc_mb, $t12.16b, $t11.16b @ MODULO - fold into mid 6081 6082 pmull $acc_h.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low 6083 ext $t11.16b, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment 6084 6085 eor3 $acc_lb, $acc_lb, $acc_hb, $t11.16b @ MODULO - fold into low 6086 ext $acc_lb, $acc_lb, $acc_lb, #8 6087 rev64 $acc_lb, $acc_lb 6088 st1 { $acc_l.16b }, [$current_tag] 6089 lsr x0, $bit_length, #3 @ return sizes 6090 6091 ldp d10, d11, [sp, #16] 6092 ldp d12, d13, [sp, #32] 6093 ldp d14, d15, [sp, #48] 6094 ldp d8, d9, [sp], #80 6095 ret 6096 6097.L256_enc_ret: 6098 mov w0, #0x0 6099 ret 6100.size unroll8_eor3_aes_gcm_enc_256_kernel,.-unroll8_eor3_aes_gcm_enc_256_kernel 6101___ 6102 6103{ 6104######################################################################################### 6105# size_t unroll8_eor3_aes_gcm_dec_256_kernel(const unsigned char *in, 6106# size_t len, 6107# unsigned char *out, 6108# const void *key, 6109# unsigned char ivec[16], 6110# u64 *Xi); 6111# 6112$code.=<<___; 6113.global unroll8_eor3_aes_gcm_dec_256_kernel 6114.type unroll8_eor3_aes_gcm_dec_256_kernel,%function 6115.align 4 6116unroll8_eor3_aes_gcm_dec_256_kernel: 6117 AARCH64_VALID_CALL_TARGET 6118 cbz x1, .L256_dec_ret 6119 stp d8, d9, [sp, #-80]! 6120 mov $counter, x4 6121 mov $cc, x5 6122 stp d10, d11, [sp, #16] 6123 stp d12, d13, [sp, #32] 6124 stp d14, d15, [sp, #48] 6125 mov x5, #0xc200000000000000 6126 stp x5, xzr, [sp, #64] 6127 add $modulo_constant, sp, #64 6128 6129 ld1 { $ctr0b}, [$counter] @ CTR block 0 6130 6131 mov $constant_temp, #0x100000000 @ set up counter increment 6132 movi $rctr_inc.16b, #0x0 6133 mov $rctr_inc.d[1], $constant_temp 6134 lsr $main_end_input_ptr, $bit_length, #3 @ byte_len 6135 6136 sub $main_end_input_ptr, $main_end_input_ptr, #1 @ byte_len - 1 6137 6138 rev32 $rtmp_ctr.16b, $ctr0.16b @ set up reversed counter 6139 6140 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 0 6141 6142 rev32 $ctr1.16b, $rtmp_ctr.16b @ CTR block 1 6143 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 1 6144 6145 rev32 $ctr2.16b, $rtmp_ctr.16b @ CTR block 2 6146 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 2 6147 ldp $rk0q, $rk1q, [$cc, #0] @ load rk0, rk1 6148 6149 rev32 $ctr3.16b, $rtmp_ctr.16b @ CTR block 3 6150 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 3 6151 6152 rev32 $ctr4.16b, $rtmp_ctr.16b @ CTR block 4 6153 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 4 6154 6155 aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 0 6156 6157 rev32 $ctr5.16b, $rtmp_ctr.16b @ CTR block 5 6158 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 5 6159 6160 aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 0 6161 aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 0 6162 6163 rev32 $ctr6.16b, $rtmp_ctr.16b @ CTR block 6 6164 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 6 6165 6166 rev32 $ctr7.16b, $rtmp_ctr.16b @ CTR block 7 6167 aese $ctr4b, $rk0 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 0 6168 6169 aese $ctr6b, $rk0 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 0 6170 aese $ctr5b, $rk0 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 0 6171 6172 aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 0 6173 aese $ctr7b, $rk0 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 0 6174 ldp $rk2q, $rk3q, [$cc, #32] @ load rk2, rk3 6175 6176 aese $ctr6b, $rk1 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 1 6177 aese $ctr4b, $rk1 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 1 6178 aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 1 6179 6180 aese $ctr5b, $rk1 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 1 6181 aese $ctr7b, $rk1 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 1 6182 aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 1 6183 6184 aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 1 6185 aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 1 6186 6187 aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 2 6188 aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 2 6189 aese $ctr6b, $rk2 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 2 6190 6191 aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 2 6192 aese $ctr7b, $rk2 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 2 6193 aese $ctr5b, $rk2 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 2 6194 6195 aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 2 6196 aese $ctr4b, $rk2 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 2 6197 ldp $rk4q, $rk5q, [$cc, #64] @ load rk4, rk5 6198 6199 aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 3 6200 aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 3 6201 6202 aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 3 6203 aese $ctr4b, $rk3 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 3 6204 6205 aese $ctr5b, $rk3 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 3 6206 aese $ctr7b, $rk3 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 3 6207 aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 3 6208 6209 aese $ctr6b, $rk3 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 3 6210 6211 aese $ctr7b, $rk4 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 4 6212 aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 4 6213 6214 aese $ctr6b, $rk4 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 4 6215 aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 4 6216 aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 4 6217 6218 aese $ctr4b, $rk4 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 4 6219 aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 4 6220 aese $ctr5b, $rk4 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 4 6221 6222 aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 5 6223 aese $ctr6b, $rk5 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 5 6224 6225 ldp $rk6q, $rk7q, [$cc, #96] @ load rk6, rk7 6226 aese $ctr4b, $rk5 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 5 6227 aese $ctr7b, $rk5 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 5 6228 6229 aese $ctr5b, $rk5 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 5 6230 6231 aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 5 6232 aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 5 6233 6234 aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 5 6235 6236 aese $ctr4b, $rk6 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 6 6237 aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 6 6238 aese $ctr7b, $rk6 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 6 6239 6240 aese $ctr6b, $rk6 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 6 6241 aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 6 6242 aese $ctr5b, $rk6 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 6 6243 6244 aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 6 6245 aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 6 6246 ldp $rk8q, $rk9q, [$cc, #128] @ load rk8, rk9 6247 6248 aese $ctr5b, $rk7 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 7 6249 aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 7 6250 6251 aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 7 6252 aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 7 6253 aese $ctr7b, $rk7 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 7 6254 6255 aese $ctr4b, $rk7 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 7 6256 aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 7 6257 aese $ctr6b, $rk7 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 7 6258 6259 and $main_end_input_ptr, $main_end_input_ptr, #0xffffffffffffff80 @ number of bytes to be processed in main loop (at least 1 byte must be handled by tail) 6260 aese $ctr7b, $rk8 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 8 6261 aese $ctr5b, $rk8 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 8 6262 6263 aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 8 6264 aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 8 6265 aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 8 6266 6267 aese $ctr4b, $rk8 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 8 6268 aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 8 6269 aese $ctr6b, $rk8 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 8 6270 6271 aese $ctr2b, $rk9 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 9 6272 6273 ld1 { $acc_lb}, [$current_tag] 6274 ext $acc_lb, $acc_lb, $acc_lb, #8 6275 rev64 $acc_lb, $acc_lb 6276 ldp $rk10q, $rk11q, [$cc, #160] @ load rk10, rk11 6277 add $end_input_ptr, $input_ptr, $bit_length, lsr #3 @ end_input_ptr 6278 add $main_end_input_ptr, $main_end_input_ptr, $input_ptr 6279 6280 aese $ctr3b, $rk9 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 9 6281 aese $ctr6b, $rk9 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 9 6282 6283 aese $ctr4b, $rk9 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 9 6284 aese $ctr5b, $rk9 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 9 6285 6286 aese $ctr7b, $rk9 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 9 6287 6288 aese $ctr0b, $rk9 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 9 6289 aese $ctr1b, $rk9 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 9 6290 6291 aese $ctr4b, $rk10 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 10 6292 aese $ctr7b, $rk10 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 10 6293 aese $ctr5b, $rk10 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 10 6294 6295 aese $ctr1b, $rk10 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 10 6296 aese $ctr2b, $rk10 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 10 6297 aese $ctr0b, $rk10 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 10 6298 6299 aese $ctr6b, $rk10 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 10 6300 aese $ctr3b, $rk10 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 10 6301 ldp $rk12q, $rk13q, [$cc, #192] @ load rk12, rk13 6302 6303 aese $ctr0b, $rk11 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 11 6304 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 7 6305 6306 aese $ctr7b, $rk11 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 11 6307 aese $ctr3b, $rk11 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 11 6308 aese $ctr1b, $rk11 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 11 6309 6310 aese $ctr5b, $rk11 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 11 6311 aese $ctr4b, $rk11 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 11 6312 aese $ctr2b, $rk11 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 11 6313 6314 aese $ctr6b, $rk11 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 11 6315 ldr $rk14q, [$cc, #224] @ load rk14 6316 6317 aese $ctr1b, $rk12 \n aesmc $ctr1b, $ctr1b @ AES block 1 - round 12 6318 aese $ctr4b, $rk12 \n aesmc $ctr4b, $ctr4b @ AES block 4 - round 12 6319 aese $ctr5b, $rk12 \n aesmc $ctr5b, $ctr5b @ AES block 5 - round 12 6320 6321 cmp $input_ptr, $main_end_input_ptr @ check if we have <= 8 blocks 6322 aese $ctr3b, $rk12 \n aesmc $ctr3b, $ctr3b @ AES block 3 - round 12 6323 aese $ctr2b, $rk12 \n aesmc $ctr2b, $ctr2b @ AES block 2 - round 12 6324 6325 aese $ctr6b, $rk12 \n aesmc $ctr6b, $ctr6b @ AES block 6 - round 12 6326 aese $ctr0b, $rk12 \n aesmc $ctr0b, $ctr0b @ AES block 0 - round 12 6327 aese $ctr7b, $rk12 \n aesmc $ctr7b, $ctr7b @ AES block 7 - round 12 6328 6329 aese $ctr5b, $rk13 @ AES block 5 - round 13 6330 aese $ctr1b, $rk13 @ AES block 1 - round 13 6331 aese $ctr2b, $rk13 @ AES block 2 - round 13 6332 6333 aese $ctr0b, $rk13 @ AES block 0 - round 13 6334 aese $ctr4b, $rk13 @ AES block 4 - round 13 6335 aese $ctr6b, $rk13 @ AES block 6 - round 13 6336 6337 aese $ctr3b, $rk13 @ AES block 3 - round 13 6338 aese $ctr7b, $rk13 @ AES block 7 - round 13 6339 b.ge .L256_dec_tail @ handle tail 6340 6341 ldp $res0q, $res1q, [$input_ptr], #32 @ AES block 0, 1 - load ciphertext 6342 6343 ldp $res2q, $res3q, [$input_ptr], #32 @ AES block 2, 3 - load ciphertext 6344 6345 ldp $res4q, $res5q, [$input_ptr], #32 @ AES block 4, 5 - load ciphertext 6346 6347 ldp $res6q, $res7q, [$input_ptr], #32 @ AES block 6, 7 - load ciphertext 6348 cmp $input_ptr, $main_end_input_ptr @ check if we have <= 8 blocks 6349 6350 eor3 $ctr1b, $res1b, $ctr1b, $rk14 @ AES block 1 - result 6351 eor3 $ctr0b, $res0b, $ctr0b, $rk14 @ AES block 0 - result 6352 stp $ctr0q, $ctr1q, [$output_ptr], #32 @ AES block 0, 1 - store result 6353 6354 rev32 $ctr0.16b, $rtmp_ctr.16b @ CTR block 8 6355 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8 6356 eor3 $ctr3b, $res3b, $ctr3b, $rk14 @ AES block 3 - result 6357 6358 eor3 $ctr5b, $res5b, $ctr5b, $rk14 @ AES block 5 - result 6359 6360 eor3 $ctr4b, $res4b, $ctr4b, $rk14 @ AES block 4 - result 6361 rev32 $ctr1.16b, $rtmp_ctr.16b @ CTR block 9 6362 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 9 6363 6364 eor3 $ctr2b, $res2b, $ctr2b, $rk14 @ AES block 2 - result 6365 stp $ctr2q, $ctr3q, [$output_ptr], #32 @ AES block 2, 3 - store result 6366 6367 rev32 $ctr2.16b, $rtmp_ctr.16b @ CTR block 10 6368 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 10 6369 6370 eor3 $ctr6b, $res6b, $ctr6b, $rk14 @ AES block 6 - result 6371 6372 rev32 $ctr3.16b, $rtmp_ctr.16b @ CTR block 11 6373 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 11 6374 stp $ctr4q, $ctr5q, [$output_ptr], #32 @ AES block 4, 5 - store result 6375 6376 eor3 $ctr7b, $res7b, $ctr7b, $rk14 @ AES block 7 - result 6377 stp $ctr6q, $ctr7q, [$output_ptr], #32 @ AES block 6, 7 - store result 6378 6379 rev32 $ctr4.16b, $rtmp_ctr.16b @ CTR block 12 6380 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 12 6381 b.ge .L256_dec_prepretail @ do prepretail 6382 6383.L256_dec_main_loop: @ main loop start 6384 rev32 $ctr5.16b, $rtmp_ctr.16b @ CTR block 8k+13 6385 ldp $rk0q, $rk1q, [$cc, #0] @ load rk0, rk1 6386 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+13 6387 6388 rev64 $res1b, $res1b @ GHASH block 8k+1 6389 ldr $h7q, [$current_tag, #176] @ load h7l | h7h 6390 ext $h7.16b, $h7.16b, $h7.16b, #8 6391 ldr $h8q, [$current_tag, #208] @ load h8l | h8h 6392 ext $h8.16b, $h8.16b, $h8.16b, #8 6393 6394 rev32 $ctr6.16b, $rtmp_ctr.16b @ CTR block 8k+14 6395 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+14 6396 rev64 $res0b, $res0b @ GHASH block 8k 6397 6398 ext $acc_lb, $acc_lb, $acc_lb, #8 @ PRE 0 6399 rev64 $res4b, $res4b @ GHASH block 8k+4 6400 rev64 $res3b, $res3b @ GHASH block 8k+3 6401 6402 rev32 $ctr7.16b, $rtmp_ctr.16b @ CTR block 8k+15 6403 rev64 $res7b, $res7b @ GHASH block 8k+7 6404 6405 aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 0 6406 aese $ctr6b, $rk0 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 0 6407 aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 0 6408 6409 aese $ctr7b, $rk0 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 0 6410 aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 0 6411 aese $ctr5b, $rk0 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 0 6412 6413 aese $ctr4b, $rk0 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 0 6414 aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 0 6415 ldp $rk2q, $rk3q, [$cc, #32] @ load rk2, rk3 6416 6417 eor $res0b, $res0b, $acc_lb @ PRE 1 6418 ldr $h5q, [$current_tag, #128] @ load h5l | h5h 6419 ext $h5.16b, $h5.16b, $h5.16b, #8 6420 ldr $h6q, [$current_tag, #160] @ load h6l | h6h 6421 ext $h6.16b, $h6.16b, $h6.16b, #8 6422 aese $ctr6b, $rk1 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 1 6423 6424 aese $ctr4b, $rk1 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 1 6425 rev64 $res2b, $res2b @ GHASH block 8k+2 6426 aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 1 6427 6428 aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 1 6429 aese $ctr5b, $rk1 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 1 6430 aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 1 6431 6432 trn1 $acc_m.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid 6433 aese $ctr7b, $rk1 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 1 6434 aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 1 6435 6436 aese $ctr4b, $rk2 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 2 6437 aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 2 6438 aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 2 6439 6440 aese $ctr6b, $rk2 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 2 6441 aese $ctr7b, $rk2 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 2 6442 pmull $acc_l.1q, $res0.1d, $h8.1d @ GHASH block 8k - low 6443 6444 aese $ctr5b, $rk2 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 2 6445 aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 2 6446 aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 2 6447 6448 ldp $rk4q, $rk5q, [$cc, #64] @ load rk4, rk5 6449 pmull2 $t1.1q, $res2.2d, $h6.2d @ GHASH block 8k+2 - high 6450 aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 3 6451 6452 aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 3 6453 pmull2 $t0.1q, $res1.2d, $h7.2d @ GHASH block 8k+1 - high 6454 pmull $h7.1q, $res1.1d, $h7.1d @ GHASH block 8k+1 - low 6455 6456 aese $ctr5b, $rk3 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 3 6457 aese $ctr6b, $rk3 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 3 6458 pmull2 $acc_h.1q, $res0.2d, $h8.2d @ GHASH block 8k - high 6459 6460 aese $ctr4b, $rk3 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 3 6461 aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 3 6462 trn2 $res0.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid 6463 6464 pmull2 $t2.1q, $res3.2d, $h5.2d @ GHASH block 8k+3 - high 6465 aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 3 6466 eor $acc_hb, $acc_hb, $t0.16b @ GHASH block 8k+1 - high 6467 6468 aese $ctr5b, $rk4 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 4 6469 aese $ctr7b, $rk3 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 3 6470 aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 4 6471 6472 aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 4 6473 aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 4 6474 aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 4 6475 6476 aese $ctr6b, $rk4 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 4 6477 aese $ctr7b, $rk4 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 4 6478 aese $ctr4b, $rk4 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 4 6479 6480 ldr $h56kq, [$current_tag, #144] @ load h6k | h5k 6481 ldr $h78kq, [$current_tag, #192] @ load h8k | h7k 6482 eor $res0.16b, $res0.16b, $acc_m.16b @ GHASH block 8k, 8k+1 - mid 6483 pmull $h6.1q, $res2.1d, $h6.1d @ GHASH block 8k+2 - low 6484 6485 ldp $rk6q, $rk7q, [$cc, #96] @ load rk6, rk7 6486 aese $ctr5b, $rk5 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 5 6487 eor $acc_lb, $acc_lb, $h7.16b @ GHASH block 8k+1 - low 6488 6489 aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 5 6490 aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 5 6491 aese $ctr7b, $rk5 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 5 6492 6493 aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 5 6494 aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 5 6495 aese $ctr6b, $rk5 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 5 6496 6497 eor3 $acc_hb, $acc_hb, $t1.16b, $t2.16b @ GHASH block 8k+2, 8k+3 - high 6498 trn1 $t3.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid 6499 rev64 $res5b, $res5b @ GHASH block 8k+5 6500 6501 pmull2 $acc_m.1q, $res0.2d, $h78k.2d @ GHASH block 8k - mid 6502 pmull $h78k.1q, $res0.1d, $h78k.1d @ GHASH block 8k+1 - mid 6503 trn2 $res2.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid 6504 6505 aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 6 6506 aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 6 6507 aese $ctr4b, $rk5 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 5 6508 6509 trn1 $t6.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid 6510 aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 6 6511 aese $ctr6b, $rk6 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 6 6512 6513 eor $res2.16b, $res2.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid 6514 pmull $h5.1q, $res3.1d, $h5.1d @ GHASH block 8k+3 - low 6515 aese $ctr4b, $rk6 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 6 6516 6517 aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 6 6518 aese $ctr5b, $rk6 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 6 6519 aese $ctr7b, $rk6 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 6 6520 6521 pmull2 $t3.1q, $res2.2d, $h56k.2d @ GHASH block 8k+2 - mid 6522 pmull $h56k.1q, $res2.1d, $h56k.1d @ GHASH block 8k+3 - mid 6523 eor3 $acc_lb, $acc_lb, $h6.16b, $h5.16b @ GHASH block 8k+2, 8k+3 - low 6524 6525 ldr $h3q, [$current_tag, #80] @ load h3l | h3h 6526 ext $h3.16b, $h3.16b, $h3.16b, #8 6527 ldr $h4q, [$current_tag, #112] @ load h4l | h4h 6528 ext $h4.16b, $h4.16b, $h4.16b, #8 6529 rev64 $res6b, $res6b @ GHASH block 8k+6 6530 eor $acc_mb, $acc_mb, $h78k.16b @ GHASH block 8k+1 - mid 6531 6532 aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 7 6533 aese $ctr5b, $rk7 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 7 6534 ldp $rk8q, $rk9q, [$cc, #128] @ load rk8, rk9 6535 6536 ldr $h1q, [$current_tag, #32] @ load h1l | h1h 6537 ext $h1.16b, $h1.16b, $h1.16b, #8 6538 ldr $h2q, [$current_tag, #64] @ load h2l | h2h 6539 ext $h2.16b, $h2.16b, $h2.16b, #8 6540 eor3 $acc_mb, $acc_mb, $h56k.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid 6541 aese $ctr7b, $rk7 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 7 6542 6543 aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 7 6544 aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 7 6545 aese $ctr6b, $rk7 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 7 6546 6547 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k 6548 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k 6549 aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 7 6550 aese $ctr4b, $rk7 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 7 6551 6552 pmull2 $t4.1q, $res4.2d, $h4.2d @ GHASH block 8k+4 - high 6553 pmull $h4.1q, $res4.1d, $h4.1d @ GHASH block 8k+4 - low 6554 trn2 $res4.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid 6555 6556 aese $ctr5b, $rk8 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 8 6557 pmull2 $t5.1q, $res5.2d, $h3.2d @ GHASH block 8k+5 - high 6558 aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 8 6559 6560 aese $ctr6b, $rk8 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 8 6561 pmull $h3.1q, $res5.1d, $h3.1d @ GHASH block 8k+5 - low 6562 aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 8 6563 6564 aese $ctr4b, $rk8 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 8 6565 aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 8 6566 pmull2 $t7.1q, $res6.2d, $h2.2d @ GHASH block 8k+6 - high 6567 6568 trn1 $t9.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid 6569 aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 8 6570 aese $ctr7b, $rk8 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 8 6571 6572 ldp $rk10q, $rk11q, [$cc, #160] @ load rk10, rk11 6573 pmull $h2.1q, $res6.1d, $h2.1d @ GHASH block 8k+6 - low 6574 trn2 $res6.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid 6575 6576 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+15 6577 eor3 $acc_hb, $acc_hb, $t4.16b, $t5.16b @ GHASH block 8k+4, 8k+5 - high 6578 aese $ctr3b, $rk9 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 9 6579 6580 aese $ctr6b, $rk9 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 9 6581 eor $res6.16b, $res6.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid 6582 aese $ctr5b, $rk9 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 9 6583 6584 ldp $res0q, $res1q, [$input_ptr], #32 @ AES block 8k+8, 8k+9 - load ciphertext 6585 eor $res4.16b, $res4.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid 6586 aese $ctr7b, $rk9 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 9 6587 6588 pmull2 $t9.1q, $res6.2d, $h12k.2d @ GHASH block 8k+6 - mid 6589 aese $ctr2b, $rk9 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 9 6590 aese $ctr1b, $rk9 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 9 6591 6592 pmull2 $t6.1q, $res4.2d, $h34k.2d @ GHASH block 8k+4 - mid 6593 pmull $h34k.1q, $res4.1d, $h34k.1d @ GHASH block 8k+5 - mid 6594 pmull2 $t8.1q, $res7.2d, $h1.2d @ GHASH block 8k+7 - high 6595 6596 pmull $h1.1q, $res7.1d, $h1.1d @ GHASH block 8k+7 - low 6597 aese $ctr3b, $rk10 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 10 6598 aese $ctr6b, $rk10 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 10 6599 6600 pmull $h12k.1q, $res6.1d, $h12k.1d @ GHASH block 8k+7 - mid 6601 aese $ctr0b, $rk9 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 9 6602 eor3 $acc_lb, $acc_lb, $h4.16b, $h3.16b @ GHASH block 8k+4, 8k+5 - low 6603 6604 aese $ctr4b, $rk9 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 9 6605 eor3 $acc_mb, $acc_mb, $h34k.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid 6606 eor3 $acc_hb, $acc_hb, $t7.16b, $t8.16b @ GHASH block 8k+6, 8k+7 - high 6607 6608 aese $ctr2b, $rk10 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 10 6609 aese $ctr5b, $rk10 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 10 6610 aese $ctr7b, $rk10 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 10 6611 6612 aese $ctr1b, $rk10 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 10 6613 aese $ctr0b, $rk10 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 10 6614 aese $ctr4b, $rk10 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 10 6615 6616 eor3 $acc_lb, $acc_lb, $h2.16b, $h1.16b @ GHASH block 8k+6, 8k+7 - low 6617 rev32 $h1.16b, $rtmp_ctr.16b @ CTR block 8k+16 6618 ldr $mod_constantd, [$modulo_constant] @ MODULO - load modulo constant 6619 6620 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+16 6621 aese $ctr1b, $rk11 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 11 6622 ldp $rk12q, $rk13q, [$cc, #192] @ load rk12, rk13 6623 6624 aese $ctr0b, $rk11 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 11 6625 aese $ctr6b, $rk11 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 11 6626 6627 eor3 $acc_mb, $acc_mb, $h12k.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid 6628 rev32 $h2.16b, $rtmp_ctr.16b @ CTR block 8k+17 6629 aese $ctr2b, $rk11 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 11 6630 6631 ldp $res2q, $res3q, [$input_ptr], #32 @ AES block 8k+10, 8k+11 - load ciphertext 6632 aese $ctr7b, $rk11 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 11 6633 ext $t11.16b, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment 6634 6635 aese $ctr5b, $rk11 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 11 6636 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+17 6637 aese $ctr3b, $rk11 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 11 6638 6639 aese $ctr2b, $rk12 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 12 6640 aese $ctr7b, $rk12 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 12 6641 aese $ctr6b, $rk12 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 12 6642 6643 rev32 $h3.16b, $rtmp_ctr.16b @ CTR block 8k+18 6644 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+18 6645 pmull $t12.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid 6646 6647 eor3 $acc_mb, $acc_mb, $acc_hb, $acc_lb @ MODULO - karatsuba tidy up 6648 aese $ctr1b, $rk12 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 12 6649 aese $ctr4b, $rk11 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 11 6650 6651 ldr $rk14q, [$cc, #224] @ load rk14 6652 aese $ctr5b, $rk12 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 12 6653 aese $ctr3b, $rk12 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 12 6654 6655 eor3 $acc_mb, $acc_mb, $t12.16b, $t11.16b @ MODULO - fold into mid 6656 aese $ctr0b, $rk12 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 12 6657 aese $ctr4b, $rk12 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 12 6658 6659 ldp $res4q, $res5q, [$input_ptr], #32 @ AES block 8k+12, 8k+13 - load ciphertext 6660 aese $ctr1b, $rk13 @ AES block 8k+9 - round 13 6661 aese $ctr2b, $rk13 @ AES block 8k+10 - round 13 6662 6663 ldp $res6q, $res7q, [$input_ptr], #32 @ AES block 8k+14, 8k+15 - load ciphertext 6664 aese $ctr0b, $rk13 @ AES block 8k+8 - round 13 6665 aese $ctr5b, $rk13 @ AES block 8k+13 - round 13 6666 6667 rev32 $h4.16b, $rtmp_ctr.16b @ CTR block 8k+19 6668 eor3 $ctr2b, $res2b, $ctr2b, $rk14 @ AES block 8k+10 - result 6669 eor3 $ctr1b, $res1b, $ctr1b, $rk14 @ AES block 8k+9 - result 6670 6671 ext $t11.16b, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment 6672 aese $ctr7b, $rk13 @ AES block 8k+15 - round 13 6673 6674 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+19 6675 pmull $acc_h.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low 6676 aese $ctr4b, $rk13 @ AES block 8k+12 - round 13 6677 6678 eor3 $ctr5b, $res5b, $ctr5b, $rk14 @ AES block 8k+13 - result 6679 eor3 $ctr0b, $res0b, $ctr0b, $rk14 @ AES block 8k+8 - result 6680 aese $ctr3b, $rk13 @ AES block 8k+11 - round 13 6681 6682 stp $ctr0q, $ctr1q, [$output_ptr], #32 @ AES block 8k+8, 8k+9 - store result 6683 mov $ctr0.16b, $h1.16b @ CTR block 8k+16 6684 eor3 $ctr4b, $res4b, $ctr4b, $rk14 @ AES block 8k+12 - result 6685 6686 eor3 $acc_lb, $acc_lb, $t11.16b, $acc_hb @ MODULO - fold into low 6687 eor3 $ctr3b, $res3b, $ctr3b, $rk14 @ AES block 8k+11 - result 6688 stp $ctr2q, $ctr3q, [$output_ptr], #32 @ AES block 8k+10, 8k+11 - store result 6689 6690 mov $ctr3.16b, $h4.16b @ CTR block 8k+19 6691 mov $ctr2.16b, $h3.16b @ CTR block 8k+18 6692 aese $ctr6b, $rk13 @ AES block 8k+14 - round 13 6693 6694 mov $ctr1.16b, $h2.16b @ CTR block 8k+17 6695 stp $ctr4q, $ctr5q, [$output_ptr], #32 @ AES block 8k+12, 8k+13 - store result 6696 eor3 $ctr7b, $res7b, $ctr7b, $rk14 @ AES block 8k+15 - result 6697 6698 eor3 $ctr6b, $res6b, $ctr6b, $rk14 @ AES block 8k+14 - result 6699 rev32 $ctr4.16b, $rtmp_ctr.16b @ CTR block 8k+20 6700 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+20 6701 6702 cmp $input_ptr, $main_end_input_ptr @ LOOP CONTROL 6703 stp $ctr6q, $ctr7q, [$output_ptr], #32 @ AES block 8k+14, 8k+15 - store result 6704 b.lt .L256_dec_main_loop 6705 6706.L256_dec_prepretail: @ PREPRETAIL 6707 ldp $rk0q, $rk1q, [$cc, #0] @ load rk0, rk1 6708 rev32 $ctr5.16b, $rtmp_ctr.16b @ CTR block 8k+13 6709 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+13 6710 6711 rev64 $res4b, $res4b @ GHASH block 8k+4 6712 ldr $h56kq, [$current_tag, #144] @ load h6k | h5k 6713 ldr $h78kq, [$current_tag, #192] @ load h8k | h7k 6714 6715 rev32 $ctr6.16b, $rtmp_ctr.16b @ CTR block 8k+14 6716 rev64 $res0b, $res0b @ GHASH block 8k 6717 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+14 6718 6719 ext $acc_lb, $acc_lb, $acc_lb, #8 @ PRE 0 6720 ldr $h7q, [$current_tag, #176] @ load h7l | h7h 6721 ext $h7.16b, $h7.16b, $h7.16b, #8 6722 ldr $h8q, [$current_tag, #208] @ load h8l | h8h 6723 ext $h8.16b, $h8.16b, $h8.16b, #8 6724 rev64 $res1b, $res1b @ GHASH block 8k+1 6725 6726 rev32 $ctr7.16b, $rtmp_ctr.16b @ CTR block 8k+15 6727 rev64 $res2b, $res2b @ GHASH block 8k+2 6728 ldr $h5q, [$current_tag, #128] @ load h5l | h5h 6729 ext $h5.16b, $h5.16b, $h5.16b, #8 6730 ldr $h6q, [$current_tag, #160] @ load h6l | h6h 6731 ext $h6.16b, $h6.16b, $h6.16b, #8 6732 6733 aese $ctr0b, $rk0 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 0 6734 aese $ctr1b, $rk0 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 0 6735 aese $ctr4b, $rk0 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 0 6736 6737 aese $ctr3b, $rk0 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 0 6738 aese $ctr5b, $rk0 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 0 6739 aese $ctr6b, $rk0 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 0 6740 6741 aese $ctr4b, $rk1 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 1 6742 aese $ctr7b, $rk0 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 0 6743 aese $ctr2b, $rk0 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 0 6744 6745 ldp $rk2q, $rk3q, [$cc, #32] @ load rk2, rk3 6746 aese $ctr0b, $rk1 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 1 6747 eor $res0b, $res0b, $acc_lb @ PRE 1 6748 6749 aese $ctr7b, $rk1 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 1 6750 aese $ctr6b, $rk1 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 1 6751 aese $ctr2b, $rk1 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 1 6752 6753 aese $ctr3b, $rk1 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 1 6754 aese $ctr1b, $rk1 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 1 6755 aese $ctr5b, $rk1 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 1 6756 6757 pmull2 $t0.1q, $res1.2d, $h7.2d @ GHASH block 8k+1 - high 6758 trn1 $acc_m.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid 6759 pmull $acc_l.1q, $res0.1d, $h8.1d @ GHASH block 8k - low 6760 6761 rev64 $res3b, $res3b @ GHASH block 8k+3 6762 pmull $h7.1q, $res1.1d, $h7.1d @ GHASH block 8k+1 - low 6763 6764 aese $ctr5b, $rk2 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 2 6765 aese $ctr7b, $rk2 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 2 6766 aese $ctr1b, $rk2 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 2 6767 6768 aese $ctr3b, $rk2 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 2 6769 aese $ctr6b, $rk2 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 2 6770 pmull2 $acc_h.1q, $res0.2d, $h8.2d @ GHASH block 8k - high 6771 6772 aese $ctr0b, $rk2 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 2 6773 aese $ctr7b, $rk3 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 3 6774 6775 aese $ctr5b, $rk3 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 3 6776 rev64 $res6b, $res6b @ GHASH block 8k+6 6777 6778 aese $ctr0b, $rk3 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 3 6779 aese $ctr2b, $rk2 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 2 6780 aese $ctr6b, $rk3 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 3 6781 6782 pmull2 $t1.1q, $res2.2d, $h6.2d @ GHASH block 8k+2 - high 6783 trn2 $res0.2d, $res1.2d, $res0.2d @ GHASH block 8k, 8k+1 - mid 6784 aese $ctr4b, $rk2 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 2 6785 6786 ldp $rk4q, $rk5q, [$cc, #64] @ load rk4, rk5 6787 aese $ctr1b, $rk3 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 3 6788 pmull2 $t2.1q, $res3.2d, $h5.2d @ GHASH block 8k+3 - high 6789 6790 aese $ctr2b, $rk3 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 3 6791 eor $acc_hb, $acc_hb, $t0.16b @ GHASH block 8k+1 - high 6792 eor $res0.16b, $res0.16b, $acc_m.16b @ GHASH block 8k, 8k+1 - mid 6793 6794 aese $ctr4b, $rk3 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 3 6795 pmull $h6.1q, $res2.1d, $h6.1d @ GHASH block 8k+2 - low 6796 aese $ctr3b, $rk3 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 3 6797 6798 eor3 $acc_hb, $acc_hb, $t1.16b, $t2.16b @ GHASH block 8k+2, 8k+3 - high 6799 trn1 $t3.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid 6800 trn2 $res2.2d, $res3.2d, $res2.2d @ GHASH block 8k+2, 8k+3 - mid 6801 6802 pmull2 $acc_m.1q, $res0.2d, $h78k.2d @ GHASH block 8k - mid 6803 pmull $h5.1q, $res3.1d, $h5.1d @ GHASH block 8k+3 - low 6804 eor $acc_lb, $acc_lb, $h7.16b @ GHASH block 8k+1 - low 6805 6806 pmull $h78k.1q, $res0.1d, $h78k.1d @ GHASH block 8k+1 - mid 6807 aese $ctr5b, $rk4 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 4 6808 aese $ctr0b, $rk4 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 4 6809 6810 eor3 $acc_lb, $acc_lb, $h6.16b, $h5.16b @ GHASH block 8k+2, 8k+3 - low 6811 ldr $h1q, [$current_tag, #32] @ load h1l | h1h 6812 ext $h1.16b, $h1.16b, $h1.16b, #8 6813 ldr $h2q, [$current_tag, #64] @ load h2l | h2h 6814 ext $h2.16b, $h2.16b, $h2.16b, #8 6815 aese $ctr7b, $rk4 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 4 6816 6817 aese $ctr2b, $rk4 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 4 6818 aese $ctr6b, $rk4 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 4 6819 eor $acc_mb, $acc_mb, $h78k.16b @ GHASH block 8k+1 - mid 6820 6821 eor $res2.16b, $res2.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid 6822 aese $ctr7b, $rk5 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 5 6823 aese $ctr1b, $rk4 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 4 6824 6825 aese $ctr2b, $rk5 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 5 6826 aese $ctr3b, $rk4 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 4 6827 aese $ctr4b, $rk4 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 4 6828 6829 aese $ctr1b, $rk5 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 5 6830 pmull2 $t3.1q, $res2.2d, $h56k.2d @ GHASH block 8k+2 - mid 6831 aese $ctr6b, $rk5 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 5 6832 6833 aese $ctr4b, $rk5 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 5 6834 aese $ctr3b, $rk5 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 5 6835 pmull $h56k.1q, $res2.1d, $h56k.1d @ GHASH block 8k+3 - mid 6836 6837 aese $ctr0b, $rk5 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 5 6838 aese $ctr5b, $rk5 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 5 6839 ldp $rk6q, $rk7q, [$cc, #96] @ load rk6, rk7 6840 6841 ldr $h3q, [$current_tag, #80] @ load h3l | h3h 6842 ext $h3.16b, $h3.16b, $h3.16b, #8 6843 ldr $h4q, [$current_tag, #112] @ load h4l | h4h 6844 ext $h4.16b, $h4.16b, $h4.16b, #8 6845 rev64 $res7b, $res7b @ GHASH block 8k+7 6846 rev64 $res5b, $res5b @ GHASH block 8k+5 6847 6848 eor3 $acc_mb, $acc_mb, $h56k.16b, $t3.16b @ GHASH block 8k+2, 8k+3 - mid 6849 6850 trn1 $t6.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid 6851 6852 aese $ctr0b, $rk6 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 6 6853 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k 6854 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k 6855 aese $ctr6b, $rk6 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 6 6856 6857 aese $ctr5b, $rk6 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 6 6858 aese $ctr7b, $rk6 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 6 6859 6860 pmull2 $t4.1q, $res4.2d, $h4.2d @ GHASH block 8k+4 - high 6861 pmull2 $t5.1q, $res5.2d, $h3.2d @ GHASH block 8k+5 - high 6862 pmull $h4.1q, $res4.1d, $h4.1d @ GHASH block 8k+4 - low 6863 6864 trn2 $res4.2d, $res5.2d, $res4.2d @ GHASH block 8k+4, 8k+5 - mid 6865 pmull $h3.1q, $res5.1d, $h3.1d @ GHASH block 8k+5 - low 6866 trn1 $t9.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid 6867 6868 aese $ctr7b, $rk7 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 7 6869 pmull2 $t7.1q, $res6.2d, $h2.2d @ GHASH block 8k+6 - high 6870 aese $ctr1b, $rk6 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 6 6871 6872 aese $ctr2b, $rk6 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 6 6873 aese $ctr3b, $rk6 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 6 6874 aese $ctr4b, $rk6 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 6 6875 6876 ldp $rk8q, $rk9q, [$cc, #128] @ load rk8, rk9 6877 pmull $h2.1q, $res6.1d, $h2.1d @ GHASH block 8k+6 - low 6878 aese $ctr5b, $rk7 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 7 6879 6880 aese $ctr1b, $rk7 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 7 6881 aese $ctr4b, $rk7 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 7 6882 6883 aese $ctr6b, $rk7 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 7 6884 aese $ctr2b, $rk7 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 7 6885 eor3 $acc_hb, $acc_hb, $t4.16b, $t5.16b @ GHASH block 8k+4, 8k+5 - high 6886 6887 aese $ctr0b, $rk7 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 7 6888 trn2 $res6.2d, $res7.2d, $res6.2d @ GHASH block 8k+6, 8k+7 - mid 6889 aese $ctr3b, $rk7 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 7 6890 6891 aese $ctr0b, $rk8 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 8 6892 aese $ctr7b, $rk8 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 8 6893 aese $ctr4b, $rk8 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 8 6894 6895 aese $ctr1b, $rk8 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 8 6896 aese $ctr5b, $rk8 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 8 6897 aese $ctr6b, $rk8 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 8 6898 6899 aese $ctr3b, $rk8 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 8 6900 aese $ctr4b, $rk9 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 9 6901 eor $res4.16b, $res4.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid 6902 6903 aese $ctr0b, $rk9 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 9 6904 aese $ctr1b, $rk9 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 9 6905 eor $res6.16b, $res6.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid 6906 6907 aese $ctr6b, $rk9 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 9 6908 aese $ctr7b, $rk9 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 9 6909 pmull2 $t6.1q, $res4.2d, $h34k.2d @ GHASH block 8k+4 - mid 6910 6911 aese $ctr2b, $rk8 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 8 6912 pmull $h34k.1q, $res4.1d, $h34k.1d @ GHASH block 8k+5 - mid 6913 pmull2 $t8.1q, $res7.2d, $h1.2d @ GHASH block 8k+7 - high 6914 6915 pmull2 $t9.1q, $res6.2d, $h12k.2d @ GHASH block 8k+6 - mid 6916 pmull $h12k.1q, $res6.1d, $h12k.1d @ GHASH block 8k+7 - mid 6917 pmull $h1.1q, $res7.1d, $h1.1d @ GHASH block 8k+7 - low 6918 6919 ldp $rk10q, $rk11q, [$cc, #160] @ load rk10, rk11 6920 eor3 $acc_lb, $acc_lb, $h4.16b, $h3.16b @ GHASH block 8k+4, 8k+5 - low 6921 eor3 $acc_mb, $acc_mb, $h34k.16b, $t6.16b @ GHASH block 8k+4, 8k+5 - mid 6922 6923 aese $ctr2b, $rk9 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 9 6924 aese $ctr3b, $rk9 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 9 6925 aese $ctr5b, $rk9 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 9 6926 6927 eor3 $acc_hb, $acc_hb, $t7.16b, $t8.16b @ GHASH block 8k+6, 8k+7 - high 6928 eor3 $acc_lb, $acc_lb, $h2.16b, $h1.16b @ GHASH block 8k+6, 8k+7 - low 6929 ldr $mod_constantd, [$modulo_constant] @ MODULO - load modulo constant 6930 6931 eor3 $acc_mb, $acc_mb, $h12k.16b, $t9.16b @ GHASH block 8k+6, 8k+7 - mid 6932 6933 aese $ctr4b, $rk10 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 10 6934 aese $ctr6b, $rk10 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 10 6935 aese $ctr5b, $rk10 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 10 6936 6937 aese $ctr0b, $rk10 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 10 6938 aese $ctr2b, $rk10 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 10 6939 aese $ctr3b, $rk10 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 10 6940 6941 eor3 $acc_mb, $acc_mb, $acc_hb, $acc_lb @ MODULO - karatsuba tidy up 6942 6943 aese $ctr7b, $rk10 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 10 6944 aese $ctr1b, $rk10 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 10 6945 ldp $rk12q, $rk13q, [$cc, #192] @ load rk12, rk13 6946 6947 ext $t11.16b, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment 6948 6949 aese $ctr2b, $rk11 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 11 6950 aese $ctr1b, $rk11 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 11 6951 aese $ctr0b, $rk11 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 11 6952 6953 pmull $t12.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid 6954 aese $ctr3b, $rk11 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 11 6955 6956 aese $ctr7b, $rk11 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 11 6957 aese $ctr6b, $rk11 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 11 6958 aese $ctr4b, $rk11 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 11 6959 6960 aese $ctr5b, $rk11 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 11 6961 aese $ctr3b, $rk12 \n aesmc $ctr3b, $ctr3b @ AES block 8k+11 - round 12 6962 6963 eor3 $acc_mb, $acc_mb, $t12.16b, $t11.16b @ MODULO - fold into mid 6964 6965 aese $ctr3b, $rk13 @ AES block 8k+11 - round 13 6966 aese $ctr2b, $rk12 \n aesmc $ctr2b, $ctr2b @ AES block 8k+10 - round 12 6967 aese $ctr6b, $rk12 \n aesmc $ctr6b, $ctr6b @ AES block 8k+14 - round 12 6968 6969 pmull $acc_h.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low 6970 aese $ctr4b, $rk12 \n aesmc $ctr4b, $ctr4b @ AES block 8k+12 - round 12 6971 aese $ctr7b, $rk12 \n aesmc $ctr7b, $ctr7b @ AES block 8k+15 - round 12 6972 6973 aese $ctr0b, $rk12 \n aesmc $ctr0b, $ctr0b @ AES block 8k+8 - round 12 6974 ldr $rk14q, [$cc, #224] @ load rk14 6975 aese $ctr1b, $rk12 \n aesmc $ctr1b, $ctr1b @ AES block 8k+9 - round 12 6976 6977 aese $ctr4b, $rk13 @ AES block 8k+12 - round 13 6978 ext $t11.16b, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment 6979 aese $ctr5b, $rk12 \n aesmc $ctr5b, $ctr5b @ AES block 8k+13 - round 12 6980 6981 aese $ctr6b, $rk13 @ AES block 8k+14 - round 13 6982 aese $ctr2b, $rk13 @ AES block 8k+10 - round 13 6983 aese $ctr1b, $rk13 @ AES block 8k+9 - round 13 6984 6985 aese $ctr5b, $rk13 @ AES block 8k+13 - round 13 6986 eor3 $acc_lb, $acc_lb, $t11.16b, $acc_hb @ MODULO - fold into low 6987 add $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s @ CTR block 8k+15 6988 6989 aese $ctr7b, $rk13 @ AES block 8k+15 - round 13 6990 aese $ctr0b, $rk13 @ AES block 8k+8 - round 13 6991.L256_dec_tail: @ TAIL 6992 6993 ext $t0.16b, $acc_lb, $acc_lb, #8 @ prepare final partial tag 6994 sub $main_end_input_ptr, $end_input_ptr, $input_ptr @ main_end_input_ptr is number of bytes left to process 6995 cmp $main_end_input_ptr, #112 6996 6997 ldr $res1q, [$input_ptr], #16 @ AES block 8k+8 - load ciphertext 6998 6999 ldp $h78kq, $h8q, [$current_tag, #192] @ load h8k | h7k 7000 ext $h8.16b, $h8.16b, $h8.16b, #8 7001 mov $t1.16b, $rk14 7002 7003 ldp $h5q, $h56kq, [$current_tag, #128] @ load h5l | h5h 7004 ext $h5.16b, $h5.16b, $h5.16b, #8 7005 7006 eor3 $res4b, $res1b, $ctr0b, $t1.16b @ AES block 8k+8 - result 7007 ldp $h6q, $h7q, [$current_tag, #160] @ load h6l | h6h 7008 ext $h6.16b, $h6.16b, $h6.16b, #8 7009 ext $h7.16b, $h7.16b, $h7.16b, #8 7010 b.gt .L256_dec_blocks_more_than_7 7011 7012 mov $ctr7b, $ctr6b 7013 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 7014 mov $ctr6b, $ctr5b 7015 7016 mov $ctr5b, $ctr4b 7017 mov $ctr4b, $ctr3b 7018 movi $acc_l.8b, #0 7019 7020 movi $acc_h.8b, #0 7021 movi $acc_m.8b, #0 7022 mov $ctr3b, $ctr2b 7023 7024 cmp $main_end_input_ptr, #96 7025 mov $ctr2b, $ctr1b 7026 b.gt .L256_dec_blocks_more_than_6 7027 7028 mov $ctr7b, $ctr6b 7029 mov $ctr6b, $ctr5b 7030 7031 mov $ctr5b, $ctr4b 7032 cmp $main_end_input_ptr, #80 7033 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 7034 7035 mov $ctr4b, $ctr3b 7036 mov $ctr3b, $ctr1b 7037 b.gt .L256_dec_blocks_more_than_5 7038 7039 cmp $main_end_input_ptr, #64 7040 mov $ctr7b, $ctr6b 7041 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 7042 7043 mov $ctr6b, $ctr5b 7044 7045 mov $ctr5b, $ctr4b 7046 mov $ctr4b, $ctr1b 7047 b.gt .L256_dec_blocks_more_than_4 7048 7049 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 7050 mov $ctr7b, $ctr6b 7051 cmp $main_end_input_ptr, #48 7052 7053 mov $ctr6b, $ctr5b 7054 mov $ctr5b, $ctr1b 7055 b.gt .L256_dec_blocks_more_than_3 7056 7057 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k 7058 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 7059 mov $ctr7b, $ctr6b 7060 7061 cmp $main_end_input_ptr, #32 7062 mov $ctr6b, $ctr1b 7063 b.gt .L256_dec_blocks_more_than_2 7064 7065 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 7066 7067 mov $ctr7b, $ctr1b 7068 cmp $main_end_input_ptr, #16 7069 b.gt .L256_dec_blocks_more_than_1 7070 7071 sub $rtmp_ctr.4s, $rtmp_ctr.4s, $rctr_inc.4s 7072 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k 7073 b .L256_dec_blocks_less_than_1 7074.L256_dec_blocks_more_than_7: @ blocks left > 7 7075 rev64 $res0b, $res1b @ GHASH final-7 block 7076 ldr $res1q, [$input_ptr], #16 @ AES final-6 block - load ciphertext 7077 st1 { $res4b}, [$output_ptr], #16 @ AES final-7 block - store result 7078 7079 ins $acc_m.d[0], $h78k.d[1] @ GHASH final-7 block - mid 7080 7081 eor $res0b, $res0b, $t0.16b @ feed in partial tag 7082 7083 ins $rk4v.d[0], $res0.d[1] @ GHASH final-7 block - mid 7084 eor3 $res4b, $res1b, $ctr1b, $t1.16b @ AES final-6 block - result 7085 7086 pmull2 $acc_h.1q, $res0.2d, $h8.2d @ GHASH final-7 block - high 7087 7088 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-7 block - mid 7089 movi $t0.8b, #0 @ supress further partial tag feed in 7090 7091 pmull $acc_l.1q, $res0.1d, $h8.1d @ GHASH final-7 block - low 7092 pmull $acc_m.1q, $rk4v.1d, $acc_m.1d @ GHASH final-7 block - mid 7093.L256_dec_blocks_more_than_6: @ blocks left > 6 7094 7095 rev64 $res0b, $res1b @ GHASH final-6 block 7096 7097 eor $res0b, $res0b, $t0.16b @ feed in partial tag 7098 ldr $res1q, [$input_ptr], #16 @ AES final-5 block - load ciphertext 7099 movi $t0.8b, #0 @ supress further partial tag feed in 7100 7101 ins $rk4v.d[0], $res0.d[1] @ GHASH final-6 block - mid 7102 st1 { $res4b}, [$output_ptr], #16 @ AES final-6 block - store result 7103 pmull2 $rk2q1, $res0.2d, $h7.2d @ GHASH final-6 block - high 7104 7105 pmull $rk3q1, $res0.1d, $h7.1d @ GHASH final-6 block - low 7106 7107 eor3 $res4b, $res1b, $ctr2b, $t1.16b @ AES final-5 block - result 7108 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-6 block - low 7109 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-6 block - mid 7110 7111 pmull $rk4v.1q, $rk4v.1d, $h78k.1d @ GHASH final-6 block - mid 7112 7113 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-6 block - mid 7114 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-6 block - high 7115.L256_dec_blocks_more_than_5: @ blocks left > 5 7116 7117 rev64 $res0b, $res1b @ GHASH final-5 block 7118 7119 eor $res0b, $res0b, $t0.16b @ feed in partial tag 7120 7121 pmull2 $rk2q1, $res0.2d, $h6.2d @ GHASH final-5 block - high 7122 ins $rk4v.d[0], $res0.d[1] @ GHASH final-5 block - mid 7123 7124 ldr $res1q, [$input_ptr], #16 @ AES final-4 block - load ciphertext 7125 7126 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-5 block - mid 7127 st1 { $res4b}, [$output_ptr], #16 @ AES final-5 block - store result 7128 7129 pmull $rk3q1, $res0.1d, $h6.1d @ GHASH final-5 block - low 7130 ins $rk4v.d[1], $rk4v.d[0] @ GHASH final-5 block - mid 7131 7132 pmull2 $rk4v.1q, $rk4v.2d, $h56k.2d @ GHASH final-5 block - mid 7133 7134 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-5 block - high 7135 eor3 $res4b, $res1b, $ctr3b, $t1.16b @ AES final-4 block - result 7136 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-5 block - low 7137 7138 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-5 block - mid 7139 movi $t0.8b, #0 @ supress further partial tag feed in 7140.L256_dec_blocks_more_than_4: @ blocks left > 4 7141 7142 rev64 $res0b, $res1b @ GHASH final-4 block 7143 7144 eor $res0b, $res0b, $t0.16b @ feed in partial tag 7145 7146 ins $rk4v.d[0], $res0.d[1] @ GHASH final-4 block - mid 7147 ldr $res1q, [$input_ptr], #16 @ AES final-3 block - load ciphertext 7148 7149 movi $t0.8b, #0 @ supress further partial tag feed in 7150 7151 pmull $rk3q1, $res0.1d, $h5.1d @ GHASH final-4 block - low 7152 pmull2 $rk2q1, $res0.2d, $h5.2d @ GHASH final-4 block - high 7153 7154 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-4 block - mid 7155 7156 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-4 block - high 7157 7158 pmull $rk4v.1q, $rk4v.1d, $h56k.1d @ GHASH final-4 block - mid 7159 7160 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-4 block - low 7161 st1 { $res4b}, [$output_ptr], #16 @ AES final-4 block - store result 7162 7163 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-4 block - mid 7164 eor3 $res4b, $res1b, $ctr4b, $t1.16b @ AES final-3 block - result 7165.L256_dec_blocks_more_than_3: @ blocks left > 3 7166 7167 ldr $h4q, [$current_tag, #112] @ load h4l | h4h 7168 ext $h4.16b, $h4.16b, $h4.16b, #8 7169 rev64 $res0b, $res1b @ GHASH final-3 block 7170 7171 eor $res0b, $res0b, $t0.16b @ feed in partial tag 7172 ldr $res1q, [$input_ptr], #16 @ AES final-2 block - load ciphertext 7173 ldr $h34kq, [$current_tag, #96] @ load h4k | h3k 7174 7175 ins $rk4v.d[0], $res0.d[1] @ GHASH final-3 block - mid 7176 st1 { $res4b}, [$output_ptr], #16 @ AES final-3 block - store result 7177 7178 eor3 $res4b, $res1b, $ctr5b, $t1.16b @ AES final-2 block - result 7179 7180 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-3 block - mid 7181 7182 ins $rk4v.d[1], $rk4v.d[0] @ GHASH final-3 block - mid 7183 pmull $rk3q1, $res0.1d, $h4.1d @ GHASH final-3 block - low 7184 pmull2 $rk2q1, $res0.2d, $h4.2d @ GHASH final-3 block - high 7185 7186 movi $t0.8b, #0 @ supress further partial tag feed in 7187 pmull2 $rk4v.1q, $rk4v.2d, $h34k.2d @ GHASH final-3 block - mid 7188 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-3 block - low 7189 7190 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-3 block - high 7191 7192 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-3 block - mid 7193.L256_dec_blocks_more_than_2: @ blocks left > 2 7194 7195 rev64 $res0b, $res1b @ GHASH final-2 block 7196 7197 ldr $h3q, [$current_tag, #80] @ load h3l | h3h 7198 ext $h3.16b, $h3.16b, $h3.16b, #8 7199 ldr $res1q, [$input_ptr], #16 @ AES final-1 block - load ciphertext 7200 7201 eor $res0b, $res0b, $t0.16b @ feed in partial tag 7202 7203 ins $rk4v.d[0], $res0.d[1] @ GHASH final-2 block - mid 7204 7205 pmull $rk3q1, $res0.1d, $h3.1d @ GHASH final-2 block - low 7206 st1 { $res4b}, [$output_ptr], #16 @ AES final-2 block - store result 7207 eor3 $res4b, $res1b, $ctr6b, $t1.16b @ AES final-1 block - result 7208 7209 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-2 block - mid 7210 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-2 block - low 7211 movi $t0.8b, #0 @ supress further partial tag feed in 7212 7213 pmull $rk4v.1q, $rk4v.1d, $h34k.1d @ GHASH final-2 block - mid 7214 pmull2 $rk2q1, $res0.2d, $h3.2d @ GHASH final-2 block - high 7215 7216 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-2 block - mid 7217 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-2 block - high 7218.L256_dec_blocks_more_than_1: @ blocks left > 1 7219 7220 rev64 $res0b, $res1b @ GHASH final-1 block 7221 7222 eor $res0b, $res0b, $t0.16b @ feed in partial tag 7223 7224 ins $rk4v.d[0], $res0.d[1] @ GHASH final-1 block - mid 7225 ldr $h2q, [$current_tag, #64] @ load h2l | h2h 7226 ext $h2.16b, $h2.16b, $h2.16b, #8 7227 7228 eor $rk4v.8b, $rk4v.8b, $res0.8b @ GHASH final-1 block - mid 7229 ldr $res1q, [$input_ptr], #16 @ AES final block - load ciphertext 7230 st1 { $res4b}, [$output_ptr], #16 @ AES final-1 block - store result 7231 7232 ldr $h12kq, [$current_tag, #48] @ load h2k | h1k 7233 pmull $rk3q1, $res0.1d, $h2.1d @ GHASH final-1 block - low 7234 7235 ins $rk4v.d[1], $rk4v.d[0] @ GHASH final-1 block - mid 7236 7237 eor $acc_lb, $acc_lb, $rk3 @ GHASH final-1 block - low 7238 7239 eor3 $res4b, $res1b, $ctr7b, $t1.16b @ AES final block - result 7240 pmull2 $rk2q1, $res0.2d, $h2.2d @ GHASH final-1 block - high 7241 7242 pmull2 $rk4v.1q, $rk4v.2d, $h12k.2d @ GHASH final-1 block - mid 7243 7244 movi $t0.8b, #0 @ supress further partial tag feed in 7245 eor $acc_hb, $acc_hb, $rk2 @ GHASH final-1 block - high 7246 7247 eor $acc_mb, $acc_mb, $rk4v.16b @ GHASH final-1 block - mid 7248.L256_dec_blocks_less_than_1: @ blocks left <= 1 7249 7250 ld1 { $rk0}, [$output_ptr] @ load existing bytes where the possibly partial last block is to be stored 7251 mvn $temp0_x, xzr @ temp0_x = 0xffffffffffffffff 7252 and $bit_length, $bit_length, #127 @ bit_length %= 128 7253 7254 sub $bit_length, $bit_length, #128 @ bit_length -= 128 7255 rev32 $rtmp_ctr.16b, $rtmp_ctr.16b 7256 str $rtmp_ctrq, [$counter] @ store the updated counter 7257 7258 neg $bit_length, $bit_length @ bit_length = 128 - #bits in input (in range [1,128]) 7259 7260 and $bit_length, $bit_length, #127 @ bit_length %= 128 7261 7262 lsr $temp0_x, $temp0_x, $bit_length @ temp0_x is mask for top 64b of last block 7263 cmp $bit_length, #64 7264 mvn $temp1_x, xzr @ temp1_x = 0xffffffffffffffff 7265 7266 csel $temp3_x, $temp0_x, xzr, lt 7267 csel $temp2_x, $temp1_x, $temp0_x, lt 7268 7269 mov $ctr0.d[0], $temp2_x @ ctr0b is mask for last block 7270 mov $ctr0.d[1], $temp3_x 7271 7272 and $res1b, $res1b, $ctr0b @ possibly partial last block has zeroes in highest bits 7273 ldr $h1q, [$current_tag, #32] @ load h1l | h1h 7274 ext $h1.16b, $h1.16b, $h1.16b, #8 7275 bif $res4b, $rk0, $ctr0b @ insert existing bytes in top end of result before storing 7276 7277 rev64 $res0b, $res1b @ GHASH final block 7278 7279 eor $res0b, $res0b, $t0.16b @ feed in partial tag 7280 7281 ins $t0.d[0], $res0.d[1] @ GHASH final block - mid 7282 pmull2 $rk2q1, $res0.2d, $h1.2d @ GHASH final block - high 7283 7284 eor $t0.8b, $t0.8b, $res0.8b @ GHASH final block - mid 7285 7286 pmull $rk3q1, $res0.1d, $h1.1d @ GHASH final block - low 7287 eor $acc_hb, $acc_hb, $rk2 @ GHASH final block - high 7288 7289 pmull $t0.1q, $t0.1d, $h12k.1d @ GHASH final block - mid 7290 7291 eor $acc_mb, $acc_mb, $t0.16b @ GHASH final block - mid 7292 ldr $mod_constantd, [$modulo_constant] @ MODULO - load modulo constant 7293 eor $acc_lb, $acc_lb, $rk3 @ GHASH final block - low 7294 7295 pmull $t11.1q, $acc_h.1d, $mod_constant.1d @ MODULO - top 64b align with mid 7296 eor $t10.16b, $acc_hb, $acc_lb @ MODULO - karatsuba tidy up 7297 7298 ext $acc_hb, $acc_hb, $acc_hb, #8 @ MODULO - other top alignment 7299 st1 { $res4b}, [$output_ptr] @ store all 16B 7300 7301 eor $acc_mb, $acc_mb, $t10.16b @ MODULO - karatsuba tidy up 7302 7303 eor $t11.16b, $acc_hb, $t11.16b @ MODULO - fold into mid 7304 eor $acc_mb, $acc_mb, $t11.16b @ MODULO - fold into mid 7305 7306 pmull $acc_h.1q, $acc_m.1d, $mod_constant.1d @ MODULO - mid 64b align with low 7307 7308 ext $acc_mb, $acc_mb, $acc_mb, #8 @ MODULO - other mid alignment 7309 eor $acc_lb, $acc_lb, $acc_hb @ MODULO - fold into low 7310 7311 eor $acc_lb, $acc_lb, $acc_mb @ MODULO - fold into low 7312 ext $acc_lb, $acc_lb, $acc_lb, #8 7313 rev64 $acc_lb, $acc_lb 7314 st1 { $acc_l.16b }, [$current_tag] 7315 lsr x0, $bit_length, #3 @ return sizes 7316 7317 ldp d10, d11, [sp, #16] 7318 ldp d12, d13, [sp, #32] 7319 ldp d14, d15, [sp, #48] 7320 ldp d8, d9, [sp], #80 7321 ret 7322 7323.L256_dec_ret: 7324 mov w0, #0x0 7325 ret 7326.size unroll8_eor3_aes_gcm_dec_256_kernel,.-unroll8_eor3_aes_gcm_dec_256_kernel 7327___ 7328} 7329} 7330 7331$code.=<<___; 7332.asciz "AES GCM module for ARMv8, SPDX BSD-3-Clause by <xiaokang.qian\@arm.com>" 7333.align 2 7334#endif 7335___ 7336 7337{ 7338 my %opcode = ( 7339 "rax1" => 0xce608c00, "eor3" => 0xce000000, 7340 "bcax" => 0xce200000, "xar" => 0xce800000 ); 7341 7342 sub unsha3 { 7343 my ($mnemonic,$arg)=@_; 7344 7345 $arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)[^,]*(?:,\s*[qv]([0-9]+)[^,]*(?:,\s*[qv#]([0-9\-]+))?)?/ 7346 && 7347 sprintf ".inst\t0x%08x\t//%s %s", 7348 $opcode{$mnemonic}|$1|($2<<5)|($3<<16)|(eval($4)<<10), 7349 $mnemonic,$arg; 7350 } 7351 sub unvmov { 7352 my $arg=shift; 7353 7354 $arg =~ m/q([0-9]+)#(lo|hi),\s*q([0-9]+)#(lo|hi)/o && 7355 sprintf "ins v%d.d[%d],v%d.d[%d]",$1<8?$1:$1+8,($2 eq "lo")?0:1, 7356 $3<8?$3:$3+8,($4 eq "lo")?0:1; 7357 } 7358 7359 foreach(split("\n",$code)) { 7360 s/@\s/\/\//o; # old->new style commentary 7361 s/\`([^\`]*)\`/eval($1)/ge; 7362 7363 m/\bld1r\b/ and s/\.16b/.2d/g or 7364 s/\b(eor3|rax1|xar|bcax)\s+(v.*)/unsha3($1,$2)/ge; 7365 print $_,"\n"; 7366 } 7367} 7368 7369close STDOUT or die "error closing STDOUT: $!"; # enforce flush 7370