1#! /usr/bin/env perl 2# Copyright 2007-2020 The OpenSSL Project Authors. All Rights Reserved. 3# 4# Licensed under the Apache License 2.0 (the "License"). You may not use 5# this file except in compliance with the License. You can obtain a copy 6# in the file LICENSE in the source distribution or at 7# https://www.openssl.org/source/license.html 8 9 10# ==================================================================== 11# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 12# project. The module is, however, dual licensed under OpenSSL and 13# CRYPTOGAMS licenses depending on where you obtain it. For further 14# details see http://www.openssl.org/~appro/cryptogams/. 15# ==================================================================== 16 17# AES for s390x. 18 19# April 2007. 20# 21# Software performance improvement over gcc-generated code is ~70% and 22# in absolute terms is ~73 cycles per byte processed with 128-bit key. 23# You're likely to exclaim "why so slow?" Keep in mind that z-CPUs are 24# *strictly* in-order execution and issued instruction [in this case 25# load value from memory is critical] has to complete before execution 26# flow proceeds. S-boxes are compressed to 2KB[+256B]. 27# 28# As for hardware acceleration support. It's basically a "teaser," as 29# it can and should be improved in several ways. Most notably support 30# for CBC is not utilized, nor multiple blocks are ever processed. 31# Then software key schedule can be postponed till hardware support 32# detection... Performance improvement over assembler is reportedly 33# ~2.5x, but can reach >8x [naturally on larger chunks] if proper 34# support is implemented. 35 36# May 2007. 37# 38# Implement AES_set_[en|de]crypt_key. Key schedule setup is avoided 39# for 128-bit keys, if hardware support is detected. 40 41# January 2009. 42# 43# Add support for hardware AES192/256 and reschedule instructions to 44# minimize/avoid Address Generation Interlock hazard and to favour 45# dual-issue z10 pipeline. This gave ~25% improvement on z10 and 46# almost 50% on z9. The gain is smaller on z10, because being dual- 47# issue z10 makes it impossible to eliminate the interlock condition: 48# critical path is not long enough. Yet it spends ~24 cycles per byte 49# processed with 128-bit key. 50# 51# Unlike previous version hardware support detection takes place only 52# at the moment of key schedule setup, which is denoted in key->rounds. 53# This is done, because deferred key setup can't be made MT-safe, not 54# for keys longer than 128 bits. 55# 56# Add AES_cbc_encrypt, which gives incredible performance improvement, 57# it was measured to be ~6.6x. It's less than previously mentioned 8x, 58# because software implementation was optimized. 59 60# May 2010. 61# 62# Add AES_ctr32_encrypt. If hardware-assisted, it provides up to 4.3x 63# performance improvement over "generic" counter mode routine relying 64# on single-block, also hardware-assisted, AES_encrypt. "Up to" refers 65# to the fact that exact throughput value depends on current stack 66# frame alignment within 4KB page. In worst case you get ~75% of the 67# maximum, but *on average* it would be as much as ~98%. Meaning that 68# worst case is unlike, it's like hitting ravine on plateau. 69 70# November 2010. 71# 72# Adapt for -m31 build. If kernel supports what's called "highgprs" 73# feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit 74# instructions and achieve "64-bit" performance even in 31-bit legacy 75# application context. The feature is not specific to any particular 76# processor, as long as it's "z-CPU". Latter implies that the code 77# remains z/Architecture specific. On z990 it was measured to perform 78# 2x better than code generated by gcc 4.3. 79 80# December 2010. 81# 82# Add support for z196 "cipher message with counter" instruction. 83# Note however that it's disengaged, because it was measured to 84# perform ~12% worse than vanilla km-based code... 85 86# February 2011. 87# 88# Add AES_xts_[en|de]crypt. This includes support for z196 km-xts-aes 89# instructions, which deliver ~70% improvement at 8KB block size over 90# vanilla km-based code, 37% - at most like 512-bytes block size. 91 92# $output is the last argument if it looks like a file (it has an extension) 93# $flavour is the first argument if it doesn't look like a file 94$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef; 95$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef; 96 97if ($flavour =~ /3[12]/) { 98 $SIZE_T=4; 99 $g=""; 100} else { 101 $SIZE_T=8; 102 $g="g"; 103} 104 105$output and open STDOUT,">$output"; 106 107$softonly=0; # allow hardware support 108 109$t0="%r0"; $mask="%r0"; 110$t1="%r1"; 111$t2="%r2"; $inp="%r2"; 112$t3="%r3"; $out="%r3"; $bits="%r3"; 113$key="%r4"; 114$i1="%r5"; 115$i2="%r6"; 116$i3="%r7"; 117$s0="%r8"; 118$s1="%r9"; 119$s2="%r10"; 120$s3="%r11"; 121$tbl="%r12"; 122$rounds="%r13"; 123$ra="%r14"; 124$sp="%r15"; 125 126$stdframe=16*$SIZE_T+4*8; 127 128sub _data_word() 129{ my $i; 130 while(defined($i=shift)) { $code.=sprintf".long\t0x%08x,0x%08x\n",$i,$i; } 131} 132 133$code=<<___; 134#include "s390x_arch.h" 135 136.text 137 138.type AES_Te,\@object 139.align 256 140AES_Te: 141___ 142&_data_word( 143 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d, 144 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554, 145 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d, 146 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a, 147 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87, 148 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b, 149 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea, 150 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b, 151 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a, 152 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f, 153 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108, 154 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f, 155 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e, 156 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5, 157 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d, 158 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f, 159 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e, 160 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb, 161 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce, 162 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497, 163 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c, 164 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed, 165 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b, 166 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a, 167 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16, 168 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594, 169 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81, 170 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3, 171 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a, 172 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504, 173 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163, 174 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d, 175 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f, 176 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739, 177 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47, 178 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395, 179 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f, 180 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883, 181 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c, 182 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76, 183 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e, 184 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4, 185 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6, 186 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b, 187 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7, 188 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0, 189 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25, 190 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818, 191 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72, 192 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651, 193 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21, 194 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85, 195 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa, 196 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12, 197 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0, 198 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9, 199 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133, 200 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7, 201 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920, 202 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a, 203 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17, 204 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8, 205 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11, 206 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a); 207$code.=<<___; 208# Te4[256] 209.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 210.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 211.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 212.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 213.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc 214.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 215.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a 216.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 217.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 218.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 219.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b 220.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf 221.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 222.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 223.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 224.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 225.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 226.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 227.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 228.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb 229.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c 230.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 231.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 232.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 233.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 234.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a 235.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e 236.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e 237.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 238.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf 239.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 240.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 241# rcon[] 242.long 0x01000000, 0x02000000, 0x04000000, 0x08000000 243.long 0x10000000, 0x20000000, 0x40000000, 0x80000000 244.long 0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0 245.align 256 246.size AES_Te,.-AES_Te 247 248# void AES_encrypt(const unsigned char *inp, unsigned char *out, 249# const AES_KEY *key) { 250.globl AES_encrypt 251.type AES_encrypt,\@function 252AES_encrypt: 253___ 254$code.=<<___ if (!$softonly); 255 l %r0,240($key) 256 lhi %r1,16 257 clr %r0,%r1 258 jl .Lesoft 259 260 la %r1,0($key) 261 #la %r2,0($inp) 262 la %r4,0($out) 263 lghi %r3,16 # single block length 264 .long 0xb92e0042 # km %r4,%r2 265 brc 1,.-4 # can this happen? 266 br %r14 267.align 64 268.Lesoft: 269___ 270$code.=<<___; 271 stm${g} %r3,$ra,3*$SIZE_T($sp) 272 273 llgf $s0,0($inp) 274 llgf $s1,4($inp) 275 llgf $s2,8($inp) 276 llgf $s3,12($inp) 277 278 larl $tbl,AES_Te 279 bras $ra,_s390x_AES_encrypt 280 281 l${g} $out,3*$SIZE_T($sp) 282 st $s0,0($out) 283 st $s1,4($out) 284 st $s2,8($out) 285 st $s3,12($out) 286 287 lm${g} %r6,$ra,6*$SIZE_T($sp) 288 br $ra 289.size AES_encrypt,.-AES_encrypt 290 291.type _s390x_AES_encrypt,\@function 292.align 16 293_s390x_AES_encrypt: 294 st${g} $ra,15*$SIZE_T($sp) 295 x $s0,0($key) 296 x $s1,4($key) 297 x $s2,8($key) 298 x $s3,12($key) 299 l $rounds,240($key) 300 llill $mask,`0xff<<3` 301 aghi $rounds,-1 302 j .Lenc_loop 303.align 16 304.Lenc_loop: 305 sllg $t1,$s0,`0+3` 306 srlg $t2,$s0,`8-3` 307 srlg $t3,$s0,`16-3` 308 srl $s0,`24-3` 309 nr $s0,$mask 310 ngr $t1,$mask 311 nr $t2,$mask 312 nr $t3,$mask 313 314 srlg $i1,$s1,`16-3` # i0 315 sllg $i2,$s1,`0+3` 316 srlg $i3,$s1,`8-3` 317 srl $s1,`24-3` 318 nr $i1,$mask 319 nr $s1,$mask 320 ngr $i2,$mask 321 nr $i3,$mask 322 323 l $s0,0($s0,$tbl) # Te0[s0>>24] 324 l $t1,1($t1,$tbl) # Te3[s0>>0] 325 l $t2,2($t2,$tbl) # Te2[s0>>8] 326 l $t3,3($t3,$tbl) # Te1[s0>>16] 327 328 x $s0,3($i1,$tbl) # Te1[s1>>16] 329 l $s1,0($s1,$tbl) # Te0[s1>>24] 330 x $t2,1($i2,$tbl) # Te3[s1>>0] 331 x $t3,2($i3,$tbl) # Te2[s1>>8] 332 333 srlg $i1,$s2,`8-3` # i0 334 srlg $i2,$s2,`16-3` # i1 335 nr $i1,$mask 336 nr $i2,$mask 337 sllg $i3,$s2,`0+3` 338 srl $s2,`24-3` 339 nr $s2,$mask 340 ngr $i3,$mask 341 342 xr $s1,$t1 343 srlg $ra,$s3,`8-3` # i1 344 sllg $t1,$s3,`0+3` # i0 345 nr $ra,$mask 346 la $key,16($key) 347 ngr $t1,$mask 348 349 x $s0,2($i1,$tbl) # Te2[s2>>8] 350 x $s1,3($i2,$tbl) # Te1[s2>>16] 351 l $s2,0($s2,$tbl) # Te0[s2>>24] 352 x $t3,1($i3,$tbl) # Te3[s2>>0] 353 354 srlg $i3,$s3,`16-3` # i2 355 xr $s2,$t2 356 srl $s3,`24-3` 357 nr $i3,$mask 358 nr $s3,$mask 359 360 x $s0,0($key) 361 x $s1,4($key) 362 x $s2,8($key) 363 x $t3,12($key) 364 365 x $s0,1($t1,$tbl) # Te3[s3>>0] 366 x $s1,2($ra,$tbl) # Te2[s3>>8] 367 x $s2,3($i3,$tbl) # Te1[s3>>16] 368 l $s3,0($s3,$tbl) # Te0[s3>>24] 369 xr $s3,$t3 370 371 brct $rounds,.Lenc_loop 372 .align 16 373 374 sllg $t1,$s0,`0+3` 375 srlg $t2,$s0,`8-3` 376 ngr $t1,$mask 377 srlg $t3,$s0,`16-3` 378 srl $s0,`24-3` 379 nr $s0,$mask 380 nr $t2,$mask 381 nr $t3,$mask 382 383 srlg $i1,$s1,`16-3` # i0 384 sllg $i2,$s1,`0+3` 385 ngr $i2,$mask 386 srlg $i3,$s1,`8-3` 387 srl $s1,`24-3` 388 nr $i1,$mask 389 nr $s1,$mask 390 nr $i3,$mask 391 392 llgc $s0,2($s0,$tbl) # Te4[s0>>24] 393 llgc $t1,2($t1,$tbl) # Te4[s0>>0] 394 sll $s0,24 395 llgc $t2,2($t2,$tbl) # Te4[s0>>8] 396 llgc $t3,2($t3,$tbl) # Te4[s0>>16] 397 sll $t2,8 398 sll $t3,16 399 400 llgc $i1,2($i1,$tbl) # Te4[s1>>16] 401 llgc $s1,2($s1,$tbl) # Te4[s1>>24] 402 llgc $i2,2($i2,$tbl) # Te4[s1>>0] 403 llgc $i3,2($i3,$tbl) # Te4[s1>>8] 404 sll $i1,16 405 sll $s1,24 406 sll $i3,8 407 or $s0,$i1 408 or $s1,$t1 409 or $t2,$i2 410 or $t3,$i3 411 412 srlg $i1,$s2,`8-3` # i0 413 srlg $i2,$s2,`16-3` # i1 414 nr $i1,$mask 415 nr $i2,$mask 416 sllg $i3,$s2,`0+3` 417 srl $s2,`24-3` 418 ngr $i3,$mask 419 nr $s2,$mask 420 421 sllg $t1,$s3,`0+3` # i0 422 srlg $ra,$s3,`8-3` # i1 423 ngr $t1,$mask 424 425 llgc $i1,2($i1,$tbl) # Te4[s2>>8] 426 llgc $i2,2($i2,$tbl) # Te4[s2>>16] 427 sll $i1,8 428 llgc $s2,2($s2,$tbl) # Te4[s2>>24] 429 llgc $i3,2($i3,$tbl) # Te4[s2>>0] 430 sll $i2,16 431 nr $ra,$mask 432 sll $s2,24 433 or $s0,$i1 434 or $s1,$i2 435 or $s2,$t2 436 or $t3,$i3 437 438 srlg $i3,$s3,`16-3` # i2 439 srl $s3,`24-3` 440 nr $i3,$mask 441 nr $s3,$mask 442 443 l $t0,16($key) 444 l $t2,20($key) 445 446 llgc $i1,2($t1,$tbl) # Te4[s3>>0] 447 llgc $i2,2($ra,$tbl) # Te4[s3>>8] 448 llgc $i3,2($i3,$tbl) # Te4[s3>>16] 449 llgc $s3,2($s3,$tbl) # Te4[s3>>24] 450 sll $i2,8 451 sll $i3,16 452 sll $s3,24 453 or $s0,$i1 454 or $s1,$i2 455 or $s2,$i3 456 or $s3,$t3 457 458 l${g} $ra,15*$SIZE_T($sp) 459 xr $s0,$t0 460 xr $s1,$t2 461 x $s2,24($key) 462 x $s3,28($key) 463 464 br $ra 465.size _s390x_AES_encrypt,.-_s390x_AES_encrypt 466___ 467 468$code.=<<___; 469.type AES_Td,\@object 470.align 256 471AES_Td: 472___ 473&_data_word( 474 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96, 475 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393, 476 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25, 477 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f, 478 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1, 479 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6, 480 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da, 481 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844, 482 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd, 483 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4, 484 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45, 485 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94, 486 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7, 487 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a, 488 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5, 489 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c, 490 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1, 491 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a, 492 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75, 493 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051, 494 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46, 495 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff, 496 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77, 497 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb, 498 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000, 499 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e, 500 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927, 501 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a, 502 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e, 503 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16, 504 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d, 505 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8, 506 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd, 507 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34, 508 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163, 509 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120, 510 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d, 511 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0, 512 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422, 513 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef, 514 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36, 515 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4, 516 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662, 517 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5, 518 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3, 519 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b, 520 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8, 521 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6, 522 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6, 523 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0, 524 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815, 525 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f, 526 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df, 527 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f, 528 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e, 529 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713, 530 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89, 531 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c, 532 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf, 533 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86, 534 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f, 535 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541, 536 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190, 537 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742); 538$code.=<<___; 539# Td4[256] 540.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 541.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb 542.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 543.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb 544.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d 545.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e 546.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 547.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 548.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 549.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 550.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda 551.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 552.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a 553.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 554.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 555.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b 556.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea 557.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 558.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 559.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e 560.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 561.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b 562.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 563.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 564.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 565.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f 566.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d 567.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef 568.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 569.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 570.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 571.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d 572.size AES_Td,.-AES_Td 573 574# void AES_decrypt(const unsigned char *inp, unsigned char *out, 575# const AES_KEY *key) { 576.globl AES_decrypt 577.type AES_decrypt,\@function 578AES_decrypt: 579___ 580$code.=<<___ if (!$softonly); 581 l %r0,240($key) 582 lhi %r1,16 583 clr %r0,%r1 584 jl .Ldsoft 585 586 la %r1,0($key) 587 #la %r2,0($inp) 588 la %r4,0($out) 589 lghi %r3,16 # single block length 590 .long 0xb92e0042 # km %r4,%r2 591 brc 1,.-4 # can this happen? 592 br %r14 593.align 64 594.Ldsoft: 595___ 596$code.=<<___; 597 stm${g} %r3,$ra,3*$SIZE_T($sp) 598 599 llgf $s0,0($inp) 600 llgf $s1,4($inp) 601 llgf $s2,8($inp) 602 llgf $s3,12($inp) 603 604 larl $tbl,AES_Td 605 bras $ra,_s390x_AES_decrypt 606 607 l${g} $out,3*$SIZE_T($sp) 608 st $s0,0($out) 609 st $s1,4($out) 610 st $s2,8($out) 611 st $s3,12($out) 612 613 lm${g} %r6,$ra,6*$SIZE_T($sp) 614 br $ra 615.size AES_decrypt,.-AES_decrypt 616 617.type _s390x_AES_decrypt,\@function 618.align 16 619_s390x_AES_decrypt: 620 st${g} $ra,15*$SIZE_T($sp) 621 x $s0,0($key) 622 x $s1,4($key) 623 x $s2,8($key) 624 x $s3,12($key) 625 l $rounds,240($key) 626 llill $mask,`0xff<<3` 627 aghi $rounds,-1 628 j .Ldec_loop 629.align 16 630.Ldec_loop: 631 srlg $t1,$s0,`16-3` 632 srlg $t2,$s0,`8-3` 633 sllg $t3,$s0,`0+3` 634 srl $s0,`24-3` 635 nr $s0,$mask 636 nr $t1,$mask 637 nr $t2,$mask 638 ngr $t3,$mask 639 640 sllg $i1,$s1,`0+3` # i0 641 srlg $i2,$s1,`16-3` 642 srlg $i3,$s1,`8-3` 643 srl $s1,`24-3` 644 ngr $i1,$mask 645 nr $s1,$mask 646 nr $i2,$mask 647 nr $i3,$mask 648 649 l $s0,0($s0,$tbl) # Td0[s0>>24] 650 l $t1,3($t1,$tbl) # Td1[s0>>16] 651 l $t2,2($t2,$tbl) # Td2[s0>>8] 652 l $t3,1($t3,$tbl) # Td3[s0>>0] 653 654 x $s0,1($i1,$tbl) # Td3[s1>>0] 655 l $s1,0($s1,$tbl) # Td0[s1>>24] 656 x $t2,3($i2,$tbl) # Td1[s1>>16] 657 x $t3,2($i3,$tbl) # Td2[s1>>8] 658 659 srlg $i1,$s2,`8-3` # i0 660 sllg $i2,$s2,`0+3` # i1 661 srlg $i3,$s2,`16-3` 662 srl $s2,`24-3` 663 nr $i1,$mask 664 ngr $i2,$mask 665 nr $s2,$mask 666 nr $i3,$mask 667 668 xr $s1,$t1 669 srlg $ra,$s3,`8-3` # i1 670 srlg $t1,$s3,`16-3` # i0 671 nr $ra,$mask 672 la $key,16($key) 673 nr $t1,$mask 674 675 x $s0,2($i1,$tbl) # Td2[s2>>8] 676 x $s1,1($i2,$tbl) # Td3[s2>>0] 677 l $s2,0($s2,$tbl) # Td0[s2>>24] 678 x $t3,3($i3,$tbl) # Td1[s2>>16] 679 680 sllg $i3,$s3,`0+3` # i2 681 srl $s3,`24-3` 682 ngr $i3,$mask 683 nr $s3,$mask 684 685 xr $s2,$t2 686 x $s0,0($key) 687 x $s1,4($key) 688 x $s2,8($key) 689 x $t3,12($key) 690 691 x $s0,3($t1,$tbl) # Td1[s3>>16] 692 x $s1,2($ra,$tbl) # Td2[s3>>8] 693 x $s2,1($i3,$tbl) # Td3[s3>>0] 694 l $s3,0($s3,$tbl) # Td0[s3>>24] 695 xr $s3,$t3 696 697 brct $rounds,.Ldec_loop 698 .align 16 699 700 l $t1,`2048+0`($tbl) # prefetch Td4 701 l $t2,`2048+64`($tbl) 702 l $t3,`2048+128`($tbl) 703 l $i1,`2048+192`($tbl) 704 llill $mask,0xff 705 706 srlg $i3,$s0,24 # i0 707 srlg $t1,$s0,16 708 srlg $t2,$s0,8 709 nr $s0,$mask # i3 710 nr $t1,$mask 711 712 srlg $i1,$s1,24 713 nr $t2,$mask 714 srlg $i2,$s1,16 715 srlg $ra,$s1,8 716 nr $s1,$mask # i0 717 nr $i2,$mask 718 nr $ra,$mask 719 720 llgc $i3,2048($i3,$tbl) # Td4[s0>>24] 721 llgc $t1,2048($t1,$tbl) # Td4[s0>>16] 722 llgc $t2,2048($t2,$tbl) # Td4[s0>>8] 723 sll $t1,16 724 llgc $t3,2048($s0,$tbl) # Td4[s0>>0] 725 sllg $s0,$i3,24 726 sll $t2,8 727 728 llgc $s1,2048($s1,$tbl) # Td4[s1>>0] 729 llgc $i1,2048($i1,$tbl) # Td4[s1>>24] 730 llgc $i2,2048($i2,$tbl) # Td4[s1>>16] 731 sll $i1,24 732 llgc $i3,2048($ra,$tbl) # Td4[s1>>8] 733 sll $i2,16 734 sll $i3,8 735 or $s0,$s1 736 or $t1,$i1 737 or $t2,$i2 738 or $t3,$i3 739 740 srlg $i1,$s2,8 # i0 741 srlg $i2,$s2,24 742 srlg $i3,$s2,16 743 nr $s2,$mask # i1 744 nr $i1,$mask 745 nr $i3,$mask 746 llgc $i1,2048($i1,$tbl) # Td4[s2>>8] 747 llgc $s1,2048($s2,$tbl) # Td4[s2>>0] 748 llgc $i2,2048($i2,$tbl) # Td4[s2>>24] 749 llgc $i3,2048($i3,$tbl) # Td4[s2>>16] 750 sll $i1,8 751 sll $i2,24 752 or $s0,$i1 753 sll $i3,16 754 or $t2,$i2 755 or $t3,$i3 756 757 srlg $i1,$s3,16 # i0 758 srlg $i2,$s3,8 # i1 759 srlg $i3,$s3,24 760 nr $s3,$mask # i2 761 nr $i1,$mask 762 nr $i2,$mask 763 764 l${g} $ra,15*$SIZE_T($sp) 765 or $s1,$t1 766 l $t0,16($key) 767 l $t1,20($key) 768 769 llgc $i1,2048($i1,$tbl) # Td4[s3>>16] 770 llgc $i2,2048($i2,$tbl) # Td4[s3>>8] 771 sll $i1,16 772 llgc $s2,2048($s3,$tbl) # Td4[s3>>0] 773 llgc $s3,2048($i3,$tbl) # Td4[s3>>24] 774 sll $i2,8 775 sll $s3,24 776 or $s0,$i1 777 or $s1,$i2 778 or $s2,$t2 779 or $s3,$t3 780 781 xr $s0,$t0 782 xr $s1,$t1 783 x $s2,24($key) 784 x $s3,28($key) 785 786 br $ra 787.size _s390x_AES_decrypt,.-_s390x_AES_decrypt 788___ 789 790$code.=<<___; 791# void AES_set_encrypt_key(const unsigned char *in, int bits, 792# AES_KEY *key) { 793.globl AES_set_encrypt_key 794.type AES_set_encrypt_key,\@function 795.align 16 796AES_set_encrypt_key: 797_s390x_AES_set_encrypt_key: 798 lghi $t0,0 799 cl${g}r $inp,$t0 800 je .Lminus1 801 cl${g}r $key,$t0 802 je .Lminus1 803 804 lghi $t0,128 805 clr $bits,$t0 806 je .Lproceed 807 lghi $t0,192 808 clr $bits,$t0 809 je .Lproceed 810 lghi $t0,256 811 clr $bits,$t0 812 je .Lproceed 813 lghi %r2,-2 814 br %r14 815 816.align 16 817.Lproceed: 818___ 819$code.=<<___ if (!$softonly); 820 # convert bits to km(c) code, [128,192,256]->[18,19,20] 821 lhi %r5,-128 822 lhi %r0,18 823 ar %r5,$bits 824 srl %r5,6 825 ar %r5,%r0 826 827 larl %r1,OPENSSL_s390xcap_P 828 llihh %r0,0x8000 829 srlg %r0,%r0,0(%r5) 830 ng %r0,S390X_KM(%r1) # check availability of both km... 831 ng %r0,S390X_KMC(%r1) # ...and kmc support for given key length 832 jz .Lekey_internal 833 834 lmg %r0,%r1,0($inp) # just copy 128 bits... 835 stmg %r0,%r1,0($key) 836 lhi %r0,192 837 cr $bits,%r0 838 jl 1f 839 lg %r1,16($inp) 840 stg %r1,16($key) 841 je 1f 842 lg %r1,24($inp) 843 stg %r1,24($key) 8441: st $bits,236($key) # save bits [for debugging purposes] 845 lgr $t0,%r5 846 st %r5,240($key) # save km(c) code 847 lghi %r2,0 848 br %r14 849___ 850$code.=<<___; 851.align 16 852.Lekey_internal: 853 stm${g} %r4,%r13,4*$SIZE_T($sp) # all non-volatile regs and $key 854 855 larl $tbl,AES_Te+2048 856 857 llgf $s0,0($inp) 858 llgf $s1,4($inp) 859 llgf $s2,8($inp) 860 llgf $s3,12($inp) 861 st $s0,0($key) 862 st $s1,4($key) 863 st $s2,8($key) 864 st $s3,12($key) 865 lghi $t0,128 866 cr $bits,$t0 867 jne .Lnot128 868 869 llill $mask,0xff 870 lghi $t3,0 # i=0 871 lghi $rounds,10 872 st $rounds,240($key) 873 874 llgfr $t2,$s3 # temp=rk[3] 875 srlg $i1,$s3,8 876 srlg $i2,$s3,16 877 srlg $i3,$s3,24 878 nr $t2,$mask 879 nr $i1,$mask 880 nr $i2,$mask 881 882.align 16 883.L128_loop: 884 la $t2,0($t2,$tbl) 885 la $i1,0($i1,$tbl) 886 la $i2,0($i2,$tbl) 887 la $i3,0($i3,$tbl) 888 icm $t2,2,0($t2) # Te4[rk[3]>>0]<<8 889 icm $t2,4,0($i1) # Te4[rk[3]>>8]<<16 890 icm $t2,8,0($i2) # Te4[rk[3]>>16]<<24 891 icm $t2,1,0($i3) # Te4[rk[3]>>24] 892 x $t2,256($t3,$tbl) # rcon[i] 893 xr $s0,$t2 # rk[4]=rk[0]^... 894 xr $s1,$s0 # rk[5]=rk[1]^rk[4] 895 xr $s2,$s1 # rk[6]=rk[2]^rk[5] 896 xr $s3,$s2 # rk[7]=rk[3]^rk[6] 897 898 llgfr $t2,$s3 # temp=rk[3] 899 srlg $i1,$s3,8 900 srlg $i2,$s3,16 901 nr $t2,$mask 902 nr $i1,$mask 903 srlg $i3,$s3,24 904 nr $i2,$mask 905 906 st $s0,16($key) 907 st $s1,20($key) 908 st $s2,24($key) 909 st $s3,28($key) 910 la $key,16($key) # key+=4 911 la $t3,4($t3) # i++ 912 brct $rounds,.L128_loop 913 lghi $t0,10 914 lghi %r2,0 915 lm${g} %r4,%r13,4*$SIZE_T($sp) 916 br $ra 917 918.align 16 919.Lnot128: 920 llgf $t0,16($inp) 921 llgf $t1,20($inp) 922 st $t0,16($key) 923 st $t1,20($key) 924 lghi $t0,192 925 cr $bits,$t0 926 jne .Lnot192 927 928 llill $mask,0xff 929 lghi $t3,0 # i=0 930 lghi $rounds,12 931 st $rounds,240($key) 932 lghi $rounds,8 933 934 srlg $i1,$t1,8 935 srlg $i2,$t1,16 936 srlg $i3,$t1,24 937 nr $t1,$mask 938 nr $i1,$mask 939 nr $i2,$mask 940 941.align 16 942.L192_loop: 943 la $t1,0($t1,$tbl) 944 la $i1,0($i1,$tbl) 945 la $i2,0($i2,$tbl) 946 la $i3,0($i3,$tbl) 947 icm $t1,2,0($t1) # Te4[rk[5]>>0]<<8 948 icm $t1,4,0($i1) # Te4[rk[5]>>8]<<16 949 icm $t1,8,0($i2) # Te4[rk[5]>>16]<<24 950 icm $t1,1,0($i3) # Te4[rk[5]>>24] 951 x $t1,256($t3,$tbl) # rcon[i] 952 xr $s0,$t1 # rk[6]=rk[0]^... 953 xr $s1,$s0 # rk[7]=rk[1]^rk[6] 954 xr $s2,$s1 # rk[8]=rk[2]^rk[7] 955 xr $s3,$s2 # rk[9]=rk[3]^rk[8] 956 957 st $s0,24($key) 958 st $s1,28($key) 959 st $s2,32($key) 960 st $s3,36($key) 961 brct $rounds,.L192_continue 962 lghi $t0,12 963 lghi %r2,0 964 lm${g} %r4,%r13,4*$SIZE_T($sp) 965 br $ra 966 967.align 16 968.L192_continue: 969 lgr $t1,$s3 970 x $t1,16($key) # rk[10]=rk[4]^rk[9] 971 st $t1,40($key) 972 x $t1,20($key) # rk[11]=rk[5]^rk[10] 973 st $t1,44($key) 974 975 srlg $i1,$t1,8 976 srlg $i2,$t1,16 977 srlg $i3,$t1,24 978 nr $t1,$mask 979 nr $i1,$mask 980 nr $i2,$mask 981 982 la $key,24($key) # key+=6 983 la $t3,4($t3) # i++ 984 j .L192_loop 985 986.align 16 987.Lnot192: 988 llgf $t0,24($inp) 989 llgf $t1,28($inp) 990 st $t0,24($key) 991 st $t1,28($key) 992 llill $mask,0xff 993 lghi $t3,0 # i=0 994 lghi $rounds,14 995 st $rounds,240($key) 996 lghi $rounds,7 997 998 srlg $i1,$t1,8 999 srlg $i2,$t1,16 1000 srlg $i3,$t1,24 1001 nr $t1,$mask 1002 nr $i1,$mask 1003 nr $i2,$mask 1004 1005.align 16 1006.L256_loop: 1007 la $t1,0($t1,$tbl) 1008 la $i1,0($i1,$tbl) 1009 la $i2,0($i2,$tbl) 1010 la $i3,0($i3,$tbl) 1011 icm $t1,2,0($t1) # Te4[rk[7]>>0]<<8 1012 icm $t1,4,0($i1) # Te4[rk[7]>>8]<<16 1013 icm $t1,8,0($i2) # Te4[rk[7]>>16]<<24 1014 icm $t1,1,0($i3) # Te4[rk[7]>>24] 1015 x $t1,256($t3,$tbl) # rcon[i] 1016 xr $s0,$t1 # rk[8]=rk[0]^... 1017 xr $s1,$s0 # rk[9]=rk[1]^rk[8] 1018 xr $s2,$s1 # rk[10]=rk[2]^rk[9] 1019 xr $s3,$s2 # rk[11]=rk[3]^rk[10] 1020 st $s0,32($key) 1021 st $s1,36($key) 1022 st $s2,40($key) 1023 st $s3,44($key) 1024 brct $rounds,.L256_continue 1025 lghi $t0,14 1026 lghi %r2,0 1027 lm${g} %r4,%r13,4*$SIZE_T($sp) 1028 br $ra 1029 1030.align 16 1031.L256_continue: 1032 lgr $t1,$s3 # temp=rk[11] 1033 srlg $i1,$s3,8 1034 srlg $i2,$s3,16 1035 srlg $i3,$s3,24 1036 nr $t1,$mask 1037 nr $i1,$mask 1038 nr $i2,$mask 1039 la $t1,0($t1,$tbl) 1040 la $i1,0($i1,$tbl) 1041 la $i2,0($i2,$tbl) 1042 la $i3,0($i3,$tbl) 1043 llgc $t1,0($t1) # Te4[rk[11]>>0] 1044 icm $t1,2,0($i1) # Te4[rk[11]>>8]<<8 1045 icm $t1,4,0($i2) # Te4[rk[11]>>16]<<16 1046 icm $t1,8,0($i3) # Te4[rk[11]>>24]<<24 1047 x $t1,16($key) # rk[12]=rk[4]^... 1048 st $t1,48($key) 1049 x $t1,20($key) # rk[13]=rk[5]^rk[12] 1050 st $t1,52($key) 1051 x $t1,24($key) # rk[14]=rk[6]^rk[13] 1052 st $t1,56($key) 1053 x $t1,28($key) # rk[15]=rk[7]^rk[14] 1054 st $t1,60($key) 1055 1056 srlg $i1,$t1,8 1057 srlg $i2,$t1,16 1058 srlg $i3,$t1,24 1059 nr $t1,$mask 1060 nr $i1,$mask 1061 nr $i2,$mask 1062 1063 la $key,32($key) # key+=8 1064 la $t3,4($t3) # i++ 1065 j .L256_loop 1066 1067.Lminus1: 1068 lghi %r2,-1 1069 br $ra 1070.size AES_set_encrypt_key,.-AES_set_encrypt_key 1071 1072# void AES_set_decrypt_key(const unsigned char *in, int bits, 1073# AES_KEY *key) { 1074.globl AES_set_decrypt_key 1075.type AES_set_decrypt_key,\@function 1076.align 16 1077AES_set_decrypt_key: 1078 #st${g} $key,4*$SIZE_T($sp) # I rely on AES_set_encrypt_key to 1079 st${g} $ra,14*$SIZE_T($sp) # save non-volatile registers and $key! 1080 bras $ra,_s390x_AES_set_encrypt_key 1081 #l${g} $key,4*$SIZE_T($sp) 1082 l${g} $ra,14*$SIZE_T($sp) 1083 ltgr %r2,%r2 1084 bnzr $ra 1085___ 1086$code.=<<___ if (!$softonly); 1087 #l $t0,240($key) 1088 lhi $t1,16 1089 cr $t0,$t1 1090 jl .Lgo 1091 oill $t0,S390X_DECRYPT # set "decrypt" bit 1092 st $t0,240($key) 1093 br $ra 1094___ 1095$code.=<<___; 1096.align 16 1097.Lgo: lgr $rounds,$t0 #llgf $rounds,240($key) 1098 la $i1,0($key) 1099 sllg $i2,$rounds,4 1100 la $i2,0($i2,$key) 1101 srl $rounds,1 1102 lghi $t1,-16 1103 1104.align 16 1105.Linv: lmg $s0,$s1,0($i1) 1106 lmg $s2,$s3,0($i2) 1107 stmg $s0,$s1,0($i2) 1108 stmg $s2,$s3,0($i1) 1109 la $i1,16($i1) 1110 la $i2,0($t1,$i2) 1111 brct $rounds,.Linv 1112___ 1113$mask80=$i1; 1114$mask1b=$i2; 1115$maskfe=$i3; 1116$code.=<<___; 1117 llgf $rounds,240($key) 1118 aghi $rounds,-1 1119 sll $rounds,2 # (rounds-1)*4 1120 llilh $mask80,0x8080 1121 llilh $mask1b,0x1b1b 1122 llilh $maskfe,0xfefe 1123 oill $mask80,0x8080 1124 oill $mask1b,0x1b1b 1125 oill $maskfe,0xfefe 1126 1127.align 16 1128.Lmix: l $s0,16($key) # tp1 1129 lr $s1,$s0 1130 ngr $s1,$mask80 1131 srlg $t1,$s1,7 1132 slr $s1,$t1 1133 nr $s1,$mask1b 1134 sllg $t1,$s0,1 1135 nr $t1,$maskfe 1136 xr $s1,$t1 # tp2 1137 1138 lr $s2,$s1 1139 ngr $s2,$mask80 1140 srlg $t1,$s2,7 1141 slr $s2,$t1 1142 nr $s2,$mask1b 1143 sllg $t1,$s1,1 1144 nr $t1,$maskfe 1145 xr $s2,$t1 # tp4 1146 1147 lr $s3,$s2 1148 ngr $s3,$mask80 1149 srlg $t1,$s3,7 1150 slr $s3,$t1 1151 nr $s3,$mask1b 1152 sllg $t1,$s2,1 1153 nr $t1,$maskfe 1154 xr $s3,$t1 # tp8 1155 1156 xr $s1,$s0 # tp2^tp1 1157 xr $s2,$s0 # tp4^tp1 1158 rll $s0,$s0,24 # = ROTATE(tp1,8) 1159 xr $s2,$s3 # ^=tp8 1160 xr $s0,$s1 # ^=tp2^tp1 1161 xr $s1,$s3 # tp2^tp1^tp8 1162 xr $s0,$s2 # ^=tp4^tp1^tp8 1163 rll $s1,$s1,8 1164 rll $s2,$s2,16 1165 xr $s0,$s1 # ^= ROTATE(tp8^tp2^tp1,24) 1166 rll $s3,$s3,24 1167 xr $s0,$s2 # ^= ROTATE(tp8^tp4^tp1,16) 1168 xr $s0,$s3 # ^= ROTATE(tp8,8) 1169 1170 st $s0,16($key) 1171 la $key,4($key) 1172 brct $rounds,.Lmix 1173 1174 lm${g} %r6,%r13,6*$SIZE_T($sp)# as was saved by AES_set_encrypt_key! 1175 lghi %r2,0 1176 br $ra 1177.size AES_set_decrypt_key,.-AES_set_decrypt_key 1178___ 1179 1180######################################################################## 1181# void AES_cbc_encrypt(const unsigned char *in, unsigned char *out, 1182# size_t length, const AES_KEY *key, 1183# unsigned char *ivec, const int enc) 1184{ 1185my $inp="%r2"; 1186my $out="%r4"; # length and out are swapped 1187my $len="%r3"; 1188my $key="%r5"; 1189my $ivp="%r6"; 1190 1191$code.=<<___; 1192.globl AES_cbc_encrypt 1193.type AES_cbc_encrypt,\@function 1194.align 16 1195AES_cbc_encrypt: 1196 xgr %r3,%r4 # flip %r3 and %r4, out and len 1197 xgr %r4,%r3 1198 xgr %r3,%r4 1199___ 1200$code.=<<___ if (!$softonly); 1201 lhi %r0,16 1202 cl %r0,240($key) 1203 jh .Lcbc_software 1204 1205 lg %r0,0($ivp) # copy ivec 1206 lg %r1,8($ivp) 1207 stmg %r0,%r1,16($sp) 1208 lmg %r0,%r1,0($key) # copy key, cover 256 bit 1209 stmg %r0,%r1,32($sp) 1210 lmg %r0,%r1,16($key) 1211 stmg %r0,%r1,48($sp) 1212 l %r0,240($key) # load kmc code 1213 lghi $key,15 # res=len%16, len-=res; 1214 ngr $key,$len 1215 sl${g}r $len,$key 1216 la %r1,16($sp) # parameter block - ivec || key 1217 jz .Lkmc_truncated 1218 .long 0xb92f0042 # kmc %r4,%r2 1219 brc 1,.-4 # pay attention to "partial completion" 1220 ltr $key,$key 1221 jnz .Lkmc_truncated 1222.Lkmc_done: 1223 lmg %r0,%r1,16($sp) # copy ivec to caller 1224 stg %r0,0($ivp) 1225 stg %r1,8($ivp) 1226 br $ra 1227.align 16 1228.Lkmc_truncated: 1229 ahi $key,-1 # it's the way it's encoded in mvc 1230 tmll %r0,S390X_DECRYPT 1231 jnz .Lkmc_truncated_dec 1232 lghi %r1,0 1233 stg %r1,16*$SIZE_T($sp) 1234 stg %r1,16*$SIZE_T+8($sp) 1235 bras %r1,1f 1236 mvc 16*$SIZE_T(1,$sp),0($inp) 12371: ex $key,0(%r1) 1238 la %r1,16($sp) # restore parameter block 1239 la $inp,16*$SIZE_T($sp) 1240 lghi $len,16 1241 .long 0xb92f0042 # kmc %r4,%r2 1242 j .Lkmc_done 1243.align 16 1244.Lkmc_truncated_dec: 1245 st${g} $out,4*$SIZE_T($sp) 1246 la $out,16*$SIZE_T($sp) 1247 lghi $len,16 1248 .long 0xb92f0042 # kmc %r4,%r2 1249 l${g} $out,4*$SIZE_T($sp) 1250 bras %r1,2f 1251 mvc 0(1,$out),16*$SIZE_T($sp) 12522: ex $key,0(%r1) 1253 j .Lkmc_done 1254.align 16 1255.Lcbc_software: 1256___ 1257$code.=<<___; 1258 stm${g} $key,$ra,5*$SIZE_T($sp) 1259 lhi %r0,0 1260 cl %r0,`$stdframe+$SIZE_T-4`($sp) 1261 je .Lcbc_decrypt 1262 1263 larl $tbl,AES_Te 1264 1265 llgf $s0,0($ivp) 1266 llgf $s1,4($ivp) 1267 llgf $s2,8($ivp) 1268 llgf $s3,12($ivp) 1269 1270 lghi $t0,16 1271 sl${g}r $len,$t0 1272 brc 4,.Lcbc_enc_tail # if borrow 1273.Lcbc_enc_loop: 1274 stm${g} $inp,$out,2*$SIZE_T($sp) 1275 x $s0,0($inp) 1276 x $s1,4($inp) 1277 x $s2,8($inp) 1278 x $s3,12($inp) 1279 lgr %r4,$key 1280 1281 bras $ra,_s390x_AES_encrypt 1282 1283 lm${g} $inp,$key,2*$SIZE_T($sp) 1284 st $s0,0($out) 1285 st $s1,4($out) 1286 st $s2,8($out) 1287 st $s3,12($out) 1288 1289 la $inp,16($inp) 1290 la $out,16($out) 1291 lghi $t0,16 1292 lt${g}r $len,$len 1293 jz .Lcbc_enc_done 1294 sl${g}r $len,$t0 1295 brc 4,.Lcbc_enc_tail # if borrow 1296 j .Lcbc_enc_loop 1297.align 16 1298.Lcbc_enc_done: 1299 l${g} $ivp,6*$SIZE_T($sp) 1300 st $s0,0($ivp) 1301 st $s1,4($ivp) 1302 st $s2,8($ivp) 1303 st $s3,12($ivp) 1304 1305 lm${g} %r7,$ra,7*$SIZE_T($sp) 1306 br $ra 1307 1308.align 16 1309.Lcbc_enc_tail: 1310 aghi $len,15 1311 lghi $t0,0 1312 stg $t0,16*$SIZE_T($sp) 1313 stg $t0,16*$SIZE_T+8($sp) 1314 bras $t1,3f 1315 mvc 16*$SIZE_T(1,$sp),0($inp) 13163: ex $len,0($t1) 1317 lghi $len,0 1318 la $inp,16*$SIZE_T($sp) 1319 j .Lcbc_enc_loop 1320 1321.align 16 1322.Lcbc_decrypt: 1323 larl $tbl,AES_Td 1324 1325 lg $t0,0($ivp) 1326 lg $t1,8($ivp) 1327 stmg $t0,$t1,16*$SIZE_T($sp) 1328 1329.Lcbc_dec_loop: 1330 stm${g} $inp,$out,2*$SIZE_T($sp) 1331 llgf $s0,0($inp) 1332 llgf $s1,4($inp) 1333 llgf $s2,8($inp) 1334 llgf $s3,12($inp) 1335 lgr %r4,$key 1336 1337 bras $ra,_s390x_AES_decrypt 1338 1339 lm${g} $inp,$key,2*$SIZE_T($sp) 1340 sllg $s0,$s0,32 1341 sllg $s2,$s2,32 1342 lr $s0,$s1 1343 lr $s2,$s3 1344 1345 lg $t0,0($inp) 1346 lg $t1,8($inp) 1347 xg $s0,16*$SIZE_T($sp) 1348 xg $s2,16*$SIZE_T+8($sp) 1349 lghi $s1,16 1350 sl${g}r $len,$s1 1351 brc 4,.Lcbc_dec_tail # if borrow 1352 brc 2,.Lcbc_dec_done # if zero 1353 stg $s0,0($out) 1354 stg $s2,8($out) 1355 stmg $t0,$t1,16*$SIZE_T($sp) 1356 1357 la $inp,16($inp) 1358 la $out,16($out) 1359 j .Lcbc_dec_loop 1360 1361.Lcbc_dec_done: 1362 stg $s0,0($out) 1363 stg $s2,8($out) 1364.Lcbc_dec_exit: 1365 lm${g} %r6,$ra,6*$SIZE_T($sp) 1366 stmg $t0,$t1,0($ivp) 1367 1368 br $ra 1369 1370.align 16 1371.Lcbc_dec_tail: 1372 aghi $len,15 1373 stg $s0,16*$SIZE_T($sp) 1374 stg $s2,16*$SIZE_T+8($sp) 1375 bras $s1,4f 1376 mvc 0(1,$out),16*$SIZE_T($sp) 13774: ex $len,0($s1) 1378 j .Lcbc_dec_exit 1379.size AES_cbc_encrypt,.-AES_cbc_encrypt 1380___ 1381} 1382######################################################################## 1383# void AES_ctr32_encrypt(const unsigned char *in, unsigned char *out, 1384# size_t blocks, const AES_KEY *key, 1385# const unsigned char *ivec) 1386{ 1387my $inp="%r2"; 1388my $out="%r4"; # blocks and out are swapped 1389my $len="%r3"; 1390my $key="%r5"; my $iv0="%r5"; 1391my $ivp="%r6"; 1392my $fp ="%r7"; 1393 1394$code.=<<___; 1395.globl AES_ctr32_encrypt 1396.type AES_ctr32_encrypt,\@function 1397.align 16 1398AES_ctr32_encrypt: 1399 xgr %r3,%r4 # flip %r3 and %r4, $out and $len 1400 xgr %r4,%r3 1401 xgr %r3,%r4 1402 llgfr $len,$len # safe in ctr32 subroutine even in 64-bit case 1403___ 1404$code.=<<___ if (!$softonly); 1405 l %r0,240($key) 1406 lhi %r1,16 1407 clr %r0,%r1 1408 jl .Lctr32_software 1409 1410 st${g} $s2,10*$SIZE_T($sp) 1411 st${g} $s3,11*$SIZE_T($sp) 1412 1413 clr $len,%r1 # does work even in 64-bit mode 1414 jle .Lctr32_nokma # kma is slower for <= 16 blocks 1415 1416 larl %r1,OPENSSL_s390xcap_P 1417 lr $s2,%r0 1418 llihh $s3,0x8000 1419 srlg $s3,$s3,0($s2) 1420 ng $s3,S390X_KMA(%r1) # check kma capability vector 1421 jz .Lctr32_nokma 1422 1423 l${g}hi %r1,-$stdframe-112 1424 l${g}r $s3,$sp 1425 la $sp,0(%r1,$sp) # prepare parameter block 1426 1427 lhi %r1,0x0600 1428 sllg $len,$len,4 1429 or %r0,%r1 # set HS and LAAD flags 1430 1431 st${g} $s3,0($sp) # backchain 1432 la %r1,$stdframe($sp) 1433 1434 lmg $s2,$s3,0($key) # copy key 1435 stg $s2,$stdframe+80($sp) 1436 stg $s3,$stdframe+88($sp) 1437 lmg $s2,$s3,16($key) 1438 stg $s2,$stdframe+96($sp) 1439 stg $s3,$stdframe+104($sp) 1440 1441 lmg $s2,$s3,0($ivp) # copy iv 1442 stg $s2,$stdframe+64($sp) 1443 ahi $s3,-1 # kma requires counter-1 1444 stg $s3,$stdframe+72($sp) 1445 st $s3,$stdframe+12($sp) # copy counter 1446 1447 lghi $s2,0 # no AAD 1448 lghi $s3,0 1449 1450 .long 0xb929a042 # kma $out,$s2,$inp 1451 brc 1,.-4 # pay attention to "partial completion" 1452 1453 stg %r0,$stdframe+80($sp) # wipe key 1454 stg %r0,$stdframe+88($sp) 1455 stg %r0,$stdframe+96($sp) 1456 stg %r0,$stdframe+104($sp) 1457 la $sp,$stdframe+112($sp) 1458 1459 lm${g} $s2,$s3,10*$SIZE_T($sp) 1460 br $ra 1461 1462.align 16 1463.Lctr32_nokma: 1464 stm${g} %r6,$s1,6*$SIZE_T($sp) 1465 1466 slgr $out,$inp 1467 la %r1,0($key) # %r1 is permanent copy of $key 1468 lg $iv0,0($ivp) # load ivec 1469 lg $ivp,8($ivp) 1470 1471 # prepare and allocate stack frame at the top of 4K page 1472 # with 1K reserved for eventual signal handling 1473 lghi $s0,-1024-256-16# guarantee at least 256-bytes buffer 1474 lghi $s1,-4096 1475 algr $s0,$sp 1476 lgr $fp,$sp 1477 ngr $s0,$s1 # align at page boundary 1478 slgr $fp,$s0 # total buffer size 1479 lgr $s2,$sp 1480 lghi $s1,1024+16 # sl[g]fi is extended-immediate facility 1481 slgr $fp,$s1 # deduct reservation to get usable buffer size 1482 # buffer size is at lest 256 and at most 3072+256-16 1483 1484 la $sp,1024($s0) # alloca 1485 srlg $fp,$fp,4 # convert bytes to blocks, minimum 16 1486 st${g} $s2,0($sp) # back-chain 1487 st${g} $fp,$SIZE_T($sp) 1488 1489 slgr $len,$fp 1490 brc 1,.Lctr32_hw_switch # not zero, no borrow 1491 algr $fp,$len # input is shorter than allocated buffer 1492 lghi $len,0 1493 st${g} $fp,$SIZE_T($sp) 1494 1495.Lctr32_hw_switch: 1496___ 1497$code.=<<___ if (!$softonly && 0);# kmctr code was measured to be ~12% slower 1498 llgfr $s0,%r0 1499 lgr $s1,%r1 1500 larl %r1,OPENSSL_s390xcap_P 1501 llihh %r0,0x8000 # check if kmctr supports the function code 1502 srlg %r0,%r0,0($s0) 1503 ng %r0,S390X_KMCTR(%r1) # check kmctr capability vector 1504 lgr %r0,$s0 1505 lgr %r1,$s1 1506 jz .Lctr32_km_loop 1507 1508####### kmctr code 1509 algr $out,$inp # restore $out 1510 lgr $s1,$len # $s1 undertakes $len 1511 j .Lctr32_kmctr_loop 1512.align 16 1513.Lctr32_kmctr_loop: 1514 la $s2,16($sp) 1515 lgr $s3,$fp 1516.Lctr32_kmctr_prepare: 1517 stg $iv0,0($s2) 1518 stg $ivp,8($s2) 1519 la $s2,16($s2) 1520 ahi $ivp,1 # 32-bit increment, preserves upper half 1521 brct $s3,.Lctr32_kmctr_prepare 1522 1523 #la $inp,0($inp) # inp 1524 sllg $len,$fp,4 # len 1525 #la $out,0($out) # out 1526 la $s2,16($sp) # iv 1527 .long 0xb92da042 # kmctr $out,$s2,$inp 1528 brc 1,.-4 # pay attention to "partial completion" 1529 1530 slgr $s1,$fp 1531 brc 1,.Lctr32_kmctr_loop # not zero, no borrow 1532 algr $fp,$s1 1533 lghi $s1,0 1534 brc 4+1,.Lctr32_kmctr_loop # not zero 1535 1536 l${g} $sp,0($sp) 1537 lm${g} %r6,$s3,6*$SIZE_T($sp) 1538 br $ra 1539.align 16 1540___ 1541$code.=<<___ if (!$softonly); 1542.Lctr32_km_loop: 1543 la $s2,16($sp) 1544 lgr $s3,$fp 1545.Lctr32_km_prepare: 1546 stg $iv0,0($s2) 1547 stg $ivp,8($s2) 1548 la $s2,16($s2) 1549 ahi $ivp,1 # 32-bit increment, preserves upper half 1550 brct $s3,.Lctr32_km_prepare 1551 1552 la $s0,16($sp) # inp 1553 sllg $s1,$fp,4 # len 1554 la $s2,16($sp) # out 1555 .long 0xb92e00a8 # km %r10,%r8 1556 brc 1,.-4 # pay attention to "partial completion" 1557 1558 la $s2,16($sp) 1559 lgr $s3,$fp 1560 slgr $s2,$inp 1561.Lctr32_km_xor: 1562 lg $s0,0($inp) 1563 lg $s1,8($inp) 1564 xg $s0,0($s2,$inp) 1565 xg $s1,8($s2,$inp) 1566 stg $s0,0($out,$inp) 1567 stg $s1,8($out,$inp) 1568 la $inp,16($inp) 1569 brct $s3,.Lctr32_km_xor 1570 1571 slgr $len,$fp 1572 brc 1,.Lctr32_km_loop # not zero, no borrow 1573 algr $fp,$len 1574 lghi $len,0 1575 brc 4+1,.Lctr32_km_loop # not zero 1576 1577 l${g} $s0,0($sp) 1578 l${g} $s1,$SIZE_T($sp) 1579 la $s2,16($sp) 1580.Lctr32_km_zap: 1581 stg $s0,0($s2) 1582 stg $s0,8($s2) 1583 la $s2,16($s2) 1584 brct $s1,.Lctr32_km_zap 1585 1586 la $sp,0($s0) 1587 lm${g} %r6,$s3,6*$SIZE_T($sp) 1588 br $ra 1589.align 16 1590.Lctr32_software: 1591___ 1592$code.=<<___; 1593 stm${g} $key,$ra,5*$SIZE_T($sp) 1594 sl${g}r $inp,$out 1595 larl $tbl,AES_Te 1596 llgf $t1,12($ivp) 1597 1598.Lctr32_loop: 1599 stm${g} $inp,$out,2*$SIZE_T($sp) 1600 llgf $s0,0($ivp) 1601 llgf $s1,4($ivp) 1602 llgf $s2,8($ivp) 1603 lgr $s3,$t1 1604 st $t1,16*$SIZE_T($sp) 1605 lgr %r4,$key 1606 1607 bras $ra,_s390x_AES_encrypt 1608 1609 lm${g} $inp,$ivp,2*$SIZE_T($sp) 1610 llgf $t1,16*$SIZE_T($sp) 1611 x $s0,0($inp,$out) 1612 x $s1,4($inp,$out) 1613 x $s2,8($inp,$out) 1614 x $s3,12($inp,$out) 1615 stm $s0,$s3,0($out) 1616 1617 la $out,16($out) 1618 ahi $t1,1 # 32-bit increment 1619 brct $len,.Lctr32_loop 1620 1621 lm${g} %r6,$ra,6*$SIZE_T($sp) 1622 br $ra 1623.size AES_ctr32_encrypt,.-AES_ctr32_encrypt 1624___ 1625} 1626 1627######################################################################## 1628# void AES_xts_encrypt(const unsigned char *inp, unsigned char *out, 1629# size_t len, const AES_KEY *key1, const AES_KEY *key2, 1630# const unsigned char iv[16]); 1631# 1632{ 1633my $inp="%r2"; 1634my $out="%r4"; # len and out are swapped 1635my $len="%r3"; 1636my $key1="%r5"; # $i1 1637my $key2="%r6"; # $i2 1638my $fp="%r7"; # $i3 1639my $tweak=16*$SIZE_T+16; # or $stdframe-16, bottom of the frame... 1640 1641$code.=<<___; 1642.type _s390x_xts_km,\@function 1643.align 16 1644_s390x_xts_km: 1645___ 1646$code.=<<___ if(1); 1647 llgfr $s0,%r0 # put aside the function code 1648 lghi $s1,0x7f 1649 nr $s1,%r0 1650 larl %r1,OPENSSL_s390xcap_P 1651 llihh %r0,0x8000 1652 srlg %r0,%r0,32($s1) # check for 32+function code 1653 ng %r0,S390X_KM(%r1) # check km capability vector 1654 lgr %r0,$s0 # restore the function code 1655 la %r1,0($key1) # restore $key1 1656 jz .Lxts_km_vanilla 1657 1658 lmg $i2,$i3,$tweak($sp) # put aside the tweak value 1659 algr $out,$inp 1660 1661 oill %r0,32 # switch to xts function code 1662 aghi $s1,-18 # 1663 sllg $s1,$s1,3 # (function code - 18)*8, 0 or 16 1664 la %r1,$tweak-16($sp) 1665 slgr %r1,$s1 # parameter block position 1666 lmg $s0,$s3,0($key1) # load 256 bits of key material, 1667 stmg $s0,$s3,0(%r1) # and copy it to parameter block. 1668 # yes, it contains junk and overlaps 1669 # with the tweak in 128-bit case. 1670 # it's done to avoid conditional 1671 # branch. 1672 stmg $i2,$i3,$tweak($sp) # "re-seat" the tweak value 1673 1674 .long 0xb92e0042 # km %r4,%r2 1675 brc 1,.-4 # pay attention to "partial completion" 1676 1677 lrvg $s0,$tweak+0($sp) # load the last tweak 1678 lrvg $s1,$tweak+8($sp) 1679 stmg %r0,%r3,$tweak-32($sp) # wipe copy of the key 1680 1681 nill %r0,0xffdf # switch back to original function code 1682 la %r1,0($key1) # restore pointer to $key1 1683 slgr $out,$inp 1684 1685 llgc $len,2*$SIZE_T-1($sp) 1686 nill $len,0x0f # $len%=16 1687 br $ra 1688 1689.align 16 1690.Lxts_km_vanilla: 1691___ 1692$code.=<<___; 1693 # prepare and allocate stack frame at the top of 4K page 1694 # with 1K reserved for eventual signal handling 1695 lghi $s0,-1024-256-16# guarantee at least 256-bytes buffer 1696 lghi $s1,-4096 1697 algr $s0,$sp 1698 lgr $fp,$sp 1699 ngr $s0,$s1 # align at page boundary 1700 slgr $fp,$s0 # total buffer size 1701 lgr $s2,$sp 1702 lghi $s1,1024+16 # sl[g]fi is extended-immediate facility 1703 slgr $fp,$s1 # deduct reservation to get usable buffer size 1704 # buffer size is at lest 256 and at most 3072+256-16 1705 1706 la $sp,1024($s0) # alloca 1707 nill $fp,0xfff0 # round to 16*n 1708 st${g} $s2,0($sp) # back-chain 1709 nill $len,0xfff0 # redundant 1710 st${g} $fp,$SIZE_T($sp) 1711 1712 slgr $len,$fp 1713 brc 1,.Lxts_km_go # not zero, no borrow 1714 algr $fp,$len # input is shorter than allocated buffer 1715 lghi $len,0 1716 st${g} $fp,$SIZE_T($sp) 1717 1718.Lxts_km_go: 1719 lrvg $s0,$tweak+0($s2) # load the tweak value in little-endian 1720 lrvg $s1,$tweak+8($s2) 1721 1722 la $s2,16($sp) # vector of ascending tweak values 1723 slgr $s2,$inp 1724 srlg $s3,$fp,4 1725 j .Lxts_km_start 1726 1727.Lxts_km_loop: 1728 la $s2,16($sp) 1729 slgr $s2,$inp 1730 srlg $s3,$fp,4 1731.Lxts_km_prepare: 1732 lghi $i1,0x87 1733 srag $i2,$s1,63 # broadcast upper bit 1734 ngr $i1,$i2 # rem 1735 algr $s0,$s0 1736 alcgr $s1,$s1 1737 xgr $s0,$i1 1738.Lxts_km_start: 1739 lrvgr $i1,$s0 # flip byte order 1740 lrvgr $i2,$s1 1741 stg $i1,0($s2,$inp) 1742 stg $i2,8($s2,$inp) 1743 xg $i1,0($inp) 1744 xg $i2,8($inp) 1745 stg $i1,0($out,$inp) 1746 stg $i2,8($out,$inp) 1747 la $inp,16($inp) 1748 brct $s3,.Lxts_km_prepare 1749 1750 slgr $inp,$fp # rewind $inp 1751 la $s2,0($out,$inp) 1752 lgr $s3,$fp 1753 .long 0xb92e00aa # km $s2,$s2 1754 brc 1,.-4 # pay attention to "partial completion" 1755 1756 la $s2,16($sp) 1757 slgr $s2,$inp 1758 srlg $s3,$fp,4 1759.Lxts_km_xor: 1760 lg $i1,0($out,$inp) 1761 lg $i2,8($out,$inp) 1762 xg $i1,0($s2,$inp) 1763 xg $i2,8($s2,$inp) 1764 stg $i1,0($out,$inp) 1765 stg $i2,8($out,$inp) 1766 la $inp,16($inp) 1767 brct $s3,.Lxts_km_xor 1768 1769 slgr $len,$fp 1770 brc 1,.Lxts_km_loop # not zero, no borrow 1771 algr $fp,$len 1772 lghi $len,0 1773 brc 4+1,.Lxts_km_loop # not zero 1774 1775 l${g} $i1,0($sp) # back-chain 1776 llgf $fp,`2*$SIZE_T-4`($sp) # bytes used 1777 la $i2,16($sp) 1778 srlg $fp,$fp,4 1779.Lxts_km_zap: 1780 stg $i1,0($i2) 1781 stg $i1,8($i2) 1782 la $i2,16($i2) 1783 brct $fp,.Lxts_km_zap 1784 1785 la $sp,0($i1) 1786 llgc $len,2*$SIZE_T-1($i1) 1787 nill $len,0x0f # $len%=16 1788 bzr $ra 1789 1790 # generate one more tweak... 1791 lghi $i1,0x87 1792 srag $i2,$s1,63 # broadcast upper bit 1793 ngr $i1,$i2 # rem 1794 algr $s0,$s0 1795 alcgr $s1,$s1 1796 xgr $s0,$i1 1797 1798 ltr $len,$len # clear zero flag 1799 br $ra 1800.size _s390x_xts_km,.-_s390x_xts_km 1801 1802.globl AES_xts_encrypt 1803.type AES_xts_encrypt,\@function 1804.align 16 1805AES_xts_encrypt: 1806 xgr %r3,%r4 # flip %r3 and %r4, $out and $len 1807 xgr %r4,%r3 1808 xgr %r3,%r4 1809___ 1810$code.=<<___ if ($SIZE_T==4); 1811 llgfr $len,$len 1812___ 1813$code.=<<___; 1814 st${g} $len,1*$SIZE_T($sp) # save copy of $len 1815 srag $len,$len,4 # formally wrong, because it expands 1816 # sign byte, but who can afford asking 1817 # to process more than 2^63-1 bytes? 1818 # I use it, because it sets condition 1819 # code... 1820 bcr 8,$ra # abort if zero (i.e. less than 16) 1821___ 1822$code.=<<___ if (!$softonly); 1823 llgf %r0,240($key2) 1824 lhi %r1,16 1825 clr %r0,%r1 1826 jl .Lxts_enc_software 1827 1828 st${g} $ra,5*$SIZE_T($sp) 1829 stm${g} %r6,$s3,6*$SIZE_T($sp) 1830 1831 sllg $len,$len,4 # $len&=~15 1832 slgr $out,$inp 1833 1834 # generate the tweak value 1835 l${g} $s3,$stdframe($sp) # pointer to iv 1836 la $s2,$tweak($sp) 1837 lmg $s0,$s1,0($s3) 1838 lghi $s3,16 1839 stmg $s0,$s1,0($s2) 1840 la %r1,0($key2) # $key2 is not needed anymore 1841 .long 0xb92e00aa # km $s2,$s2, generate the tweak 1842 brc 1,.-4 # can this happen? 1843 1844 l %r0,240($key1) 1845 la %r1,0($key1) # $key1 is not needed anymore 1846 bras $ra,_s390x_xts_km 1847 jz .Lxts_enc_km_done 1848 1849 aghi $inp,-16 # take one step back 1850 la $i3,0($out,$inp) # put aside real $out 1851.Lxts_enc_km_steal: 1852 llgc $i1,16($inp) 1853 llgc $i2,0($out,$inp) 1854 stc $i1,0($out,$inp) 1855 stc $i2,16($out,$inp) 1856 la $inp,1($inp) 1857 brct $len,.Lxts_enc_km_steal 1858 1859 la $s2,0($i3) 1860 lghi $s3,16 1861 lrvgr $i1,$s0 # flip byte order 1862 lrvgr $i2,$s1 1863 xg $i1,0($s2) 1864 xg $i2,8($s2) 1865 stg $i1,0($s2) 1866 stg $i2,8($s2) 1867 .long 0xb92e00aa # km $s2,$s2 1868 brc 1,.-4 # can this happen? 1869 lrvgr $i1,$s0 # flip byte order 1870 lrvgr $i2,$s1 1871 xg $i1,0($i3) 1872 xg $i2,8($i3) 1873 stg $i1,0($i3) 1874 stg $i2,8($i3) 1875 1876.Lxts_enc_km_done: 1877 stg $sp,$tweak+0($sp) # wipe tweak 1878 stg $sp,$tweak+8($sp) 1879 l${g} $ra,5*$SIZE_T($sp) 1880 lm${g} %r6,$s3,6*$SIZE_T($sp) 1881 br $ra 1882.align 16 1883.Lxts_enc_software: 1884___ 1885$code.=<<___; 1886 stm${g} %r6,$ra,6*$SIZE_T($sp) 1887 1888 slgr $out,$inp 1889 1890 l${g} $s3,$stdframe($sp) # ivp 1891 llgf $s0,0($s3) # load iv 1892 llgf $s1,4($s3) 1893 llgf $s2,8($s3) 1894 llgf $s3,12($s3) 1895 stm${g} %r2,%r5,2*$SIZE_T($sp) 1896 la $key,0($key2) 1897 larl $tbl,AES_Te 1898 bras $ra,_s390x_AES_encrypt # generate the tweak 1899 lm${g} %r2,%r5,2*$SIZE_T($sp) 1900 stm $s0,$s3,$tweak($sp) # save the tweak 1901 j .Lxts_enc_enter 1902 1903.align 16 1904.Lxts_enc_loop: 1905 lrvg $s1,$tweak+0($sp) # load the tweak in little-endian 1906 lrvg $s3,$tweak+8($sp) 1907 lghi %r1,0x87 1908 srag %r0,$s3,63 # broadcast upper bit 1909 ngr %r1,%r0 # rem 1910 algr $s1,$s1 1911 alcgr $s3,$s3 1912 xgr $s1,%r1 1913 lrvgr $s1,$s1 # flip byte order 1914 lrvgr $s3,$s3 1915 srlg $s0,$s1,32 # smash the tweak to 4x32-bits 1916 stg $s1,$tweak+0($sp) # save the tweak 1917 llgfr $s1,$s1 1918 srlg $s2,$s3,32 1919 stg $s3,$tweak+8($sp) 1920 llgfr $s3,$s3 1921 la $inp,16($inp) # $inp+=16 1922.Lxts_enc_enter: 1923 x $s0,0($inp) # ^=*($inp) 1924 x $s1,4($inp) 1925 x $s2,8($inp) 1926 x $s3,12($inp) 1927 stm${g} %r2,%r3,2*$SIZE_T($sp) # only two registers are changing 1928 la $key,0($key1) 1929 bras $ra,_s390x_AES_encrypt 1930 lm${g} %r2,%r5,2*$SIZE_T($sp) 1931 x $s0,$tweak+0($sp) # ^=tweak 1932 x $s1,$tweak+4($sp) 1933 x $s2,$tweak+8($sp) 1934 x $s3,$tweak+12($sp) 1935 st $s0,0($out,$inp) 1936 st $s1,4($out,$inp) 1937 st $s2,8($out,$inp) 1938 st $s3,12($out,$inp) 1939 brct${g} $len,.Lxts_enc_loop 1940 1941 llgc $len,`2*$SIZE_T-1`($sp) 1942 nill $len,0x0f # $len%16 1943 jz .Lxts_enc_done 1944 1945 la $i3,0($inp,$out) # put aside real $out 1946.Lxts_enc_steal: 1947 llgc %r0,16($inp) 1948 llgc %r1,0($out,$inp) 1949 stc %r0,0($out,$inp) 1950 stc %r1,16($out,$inp) 1951 la $inp,1($inp) 1952 brct $len,.Lxts_enc_steal 1953 la $out,0($i3) # restore real $out 1954 1955 # generate last tweak... 1956 lrvg $s1,$tweak+0($sp) # load the tweak in little-endian 1957 lrvg $s3,$tweak+8($sp) 1958 lghi %r1,0x87 1959 srag %r0,$s3,63 # broadcast upper bit 1960 ngr %r1,%r0 # rem 1961 algr $s1,$s1 1962 alcgr $s3,$s3 1963 xgr $s1,%r1 1964 lrvgr $s1,$s1 # flip byte order 1965 lrvgr $s3,$s3 1966 srlg $s0,$s1,32 # smash the tweak to 4x32-bits 1967 stg $s1,$tweak+0($sp) # save the tweak 1968 llgfr $s1,$s1 1969 srlg $s2,$s3,32 1970 stg $s3,$tweak+8($sp) 1971 llgfr $s3,$s3 1972 1973 x $s0,0($out) # ^=*(inp)|stolen cipther-text 1974 x $s1,4($out) 1975 x $s2,8($out) 1976 x $s3,12($out) 1977 st${g} $out,4*$SIZE_T($sp) 1978 la $key,0($key1) 1979 bras $ra,_s390x_AES_encrypt 1980 l${g} $out,4*$SIZE_T($sp) 1981 x $s0,`$tweak+0`($sp) # ^=tweak 1982 x $s1,`$tweak+4`($sp) 1983 x $s2,`$tweak+8`($sp) 1984 x $s3,`$tweak+12`($sp) 1985 st $s0,0($out) 1986 st $s1,4($out) 1987 st $s2,8($out) 1988 st $s3,12($out) 1989 1990.Lxts_enc_done: 1991 stg $sp,$tweak+0($sp) # wipe tweak 1992 stg $sp,$tweak+8($sp) 1993 lm${g} %r6,$ra,6*$SIZE_T($sp) 1994 br $ra 1995.size AES_xts_encrypt,.-AES_xts_encrypt 1996___ 1997# void AES_xts_decrypt(const unsigned char *inp, unsigned char *out, 1998# size_t len, const AES_KEY *key1, const AES_KEY *key2, 1999# const unsigned char iv[16]); 2000# 2001$code.=<<___; 2002.globl AES_xts_decrypt 2003.type AES_xts_decrypt,\@function 2004.align 16 2005AES_xts_decrypt: 2006 xgr %r3,%r4 # flip %r3 and %r4, $out and $len 2007 xgr %r4,%r3 2008 xgr %r3,%r4 2009___ 2010$code.=<<___ if ($SIZE_T==4); 2011 llgfr $len,$len 2012___ 2013$code.=<<___; 2014 st${g} $len,1*$SIZE_T($sp) # save copy of $len 2015 aghi $len,-16 2016 bcr 4,$ra # abort if less than zero. formally 2017 # wrong, because $len is unsigned, 2018 # but who can afford asking to 2019 # process more than 2^63-1 bytes? 2020 tmll $len,0x0f 2021 jnz .Lxts_dec_proceed 2022 aghi $len,16 2023.Lxts_dec_proceed: 2024___ 2025$code.=<<___ if (!$softonly); 2026 llgf %r0,240($key2) 2027 lhi %r1,16 2028 clr %r0,%r1 2029 jl .Lxts_dec_software 2030 2031 st${g} $ra,5*$SIZE_T($sp) 2032 stm${g} %r6,$s3,6*$SIZE_T($sp) 2033 2034 nill $len,0xfff0 # $len&=~15 2035 slgr $out,$inp 2036 2037 # generate the tweak value 2038 l${g} $s3,$stdframe($sp) # pointer to iv 2039 la $s2,$tweak($sp) 2040 lmg $s0,$s1,0($s3) 2041 lghi $s3,16 2042 stmg $s0,$s1,0($s2) 2043 la %r1,0($key2) # $key2 is not needed past this point 2044 .long 0xb92e00aa # km $s2,$s2, generate the tweak 2045 brc 1,.-4 # can this happen? 2046 2047 l %r0,240($key1) 2048 la %r1,0($key1) # $key1 is not needed anymore 2049 2050 ltgr $len,$len 2051 jz .Lxts_dec_km_short 2052 bras $ra,_s390x_xts_km 2053 jz .Lxts_dec_km_done 2054 2055 lrvgr $s2,$s0 # make copy in reverse byte order 2056 lrvgr $s3,$s1 2057 j .Lxts_dec_km_2ndtweak 2058 2059.Lxts_dec_km_short: 2060 llgc $len,`2*$SIZE_T-1`($sp) 2061 nill $len,0x0f # $len%=16 2062 lrvg $s0,$tweak+0($sp) # load the tweak 2063 lrvg $s1,$tweak+8($sp) 2064 lrvgr $s2,$s0 # make copy in reverse byte order 2065 lrvgr $s3,$s1 2066 2067.Lxts_dec_km_2ndtweak: 2068 lghi $i1,0x87 2069 srag $i2,$s1,63 # broadcast upper bit 2070 ngr $i1,$i2 # rem 2071 algr $s0,$s0 2072 alcgr $s1,$s1 2073 xgr $s0,$i1 2074 lrvgr $i1,$s0 # flip byte order 2075 lrvgr $i2,$s1 2076 2077 xg $i1,0($inp) 2078 xg $i2,8($inp) 2079 stg $i1,0($out,$inp) 2080 stg $i2,8($out,$inp) 2081 la $i2,0($out,$inp) 2082 lghi $i3,16 2083 .long 0xb92e0066 # km $i2,$i2 2084 brc 1,.-4 # can this happen? 2085 lrvgr $i1,$s0 2086 lrvgr $i2,$s1 2087 xg $i1,0($out,$inp) 2088 xg $i2,8($out,$inp) 2089 stg $i1,0($out,$inp) 2090 stg $i2,8($out,$inp) 2091 2092 la $i3,0($out,$inp) # put aside real $out 2093.Lxts_dec_km_steal: 2094 llgc $i1,16($inp) 2095 llgc $i2,0($out,$inp) 2096 stc $i1,0($out,$inp) 2097 stc $i2,16($out,$inp) 2098 la $inp,1($inp) 2099 brct $len,.Lxts_dec_km_steal 2100 2101 lgr $s0,$s2 2102 lgr $s1,$s3 2103 xg $s0,0($i3) 2104 xg $s1,8($i3) 2105 stg $s0,0($i3) 2106 stg $s1,8($i3) 2107 la $s0,0($i3) 2108 lghi $s1,16 2109 .long 0xb92e0088 # km $s0,$s0 2110 brc 1,.-4 # can this happen? 2111 xg $s2,0($i3) 2112 xg $s3,8($i3) 2113 stg $s2,0($i3) 2114 stg $s3,8($i3) 2115.Lxts_dec_km_done: 2116 stg $sp,$tweak+0($sp) # wipe tweak 2117 stg $sp,$tweak+8($sp) 2118 l${g} $ra,5*$SIZE_T($sp) 2119 lm${g} %r6,$s3,6*$SIZE_T($sp) 2120 br $ra 2121.align 16 2122.Lxts_dec_software: 2123___ 2124$code.=<<___; 2125 stm${g} %r6,$ra,6*$SIZE_T($sp) 2126 2127 srlg $len,$len,4 2128 slgr $out,$inp 2129 2130 l${g} $s3,$stdframe($sp) # ivp 2131 llgf $s0,0($s3) # load iv 2132 llgf $s1,4($s3) 2133 llgf $s2,8($s3) 2134 llgf $s3,12($s3) 2135 stm${g} %r2,%r5,2*$SIZE_T($sp) 2136 la $key,0($key2) 2137 larl $tbl,AES_Te 2138 bras $ra,_s390x_AES_encrypt # generate the tweak 2139 lm${g} %r2,%r5,2*$SIZE_T($sp) 2140 larl $tbl,AES_Td 2141 lt${g}r $len,$len 2142 stm $s0,$s3,$tweak($sp) # save the tweak 2143 jz .Lxts_dec_short 2144 j .Lxts_dec_enter 2145 2146.align 16 2147.Lxts_dec_loop: 2148 lrvg $s1,$tweak+0($sp) # load the tweak in little-endian 2149 lrvg $s3,$tweak+8($sp) 2150 lghi %r1,0x87 2151 srag %r0,$s3,63 # broadcast upper bit 2152 ngr %r1,%r0 # rem 2153 algr $s1,$s1 2154 alcgr $s3,$s3 2155 xgr $s1,%r1 2156 lrvgr $s1,$s1 # flip byte order 2157 lrvgr $s3,$s3 2158 srlg $s0,$s1,32 # smash the tweak to 4x32-bits 2159 stg $s1,$tweak+0($sp) # save the tweak 2160 llgfr $s1,$s1 2161 srlg $s2,$s3,32 2162 stg $s3,$tweak+8($sp) 2163 llgfr $s3,$s3 2164.Lxts_dec_enter: 2165 x $s0,0($inp) # tweak^=*(inp) 2166 x $s1,4($inp) 2167 x $s2,8($inp) 2168 x $s3,12($inp) 2169 stm${g} %r2,%r3,2*$SIZE_T($sp) # only two registers are changing 2170 la $key,0($key1) 2171 bras $ra,_s390x_AES_decrypt 2172 lm${g} %r2,%r5,2*$SIZE_T($sp) 2173 x $s0,$tweak+0($sp) # ^=tweak 2174 x $s1,$tweak+4($sp) 2175 x $s2,$tweak+8($sp) 2176 x $s3,$tweak+12($sp) 2177 st $s0,0($out,$inp) 2178 st $s1,4($out,$inp) 2179 st $s2,8($out,$inp) 2180 st $s3,12($out,$inp) 2181 la $inp,16($inp) 2182 brct${g} $len,.Lxts_dec_loop 2183 2184 llgc $len,`2*$SIZE_T-1`($sp) 2185 nill $len,0x0f # $len%16 2186 jz .Lxts_dec_done 2187 2188 # generate pair of tweaks... 2189 lrvg $s1,$tweak+0($sp) # load the tweak in little-endian 2190 lrvg $s3,$tweak+8($sp) 2191 lghi %r1,0x87 2192 srag %r0,$s3,63 # broadcast upper bit 2193 ngr %r1,%r0 # rem 2194 algr $s1,$s1 2195 alcgr $s3,$s3 2196 xgr $s1,%r1 2197 lrvgr $i2,$s1 # flip byte order 2198 lrvgr $i3,$s3 2199 stmg $i2,$i3,$tweak($sp) # save the 1st tweak 2200 j .Lxts_dec_2ndtweak 2201 2202.align 16 2203.Lxts_dec_short: 2204 llgc $len,`2*$SIZE_T-1`($sp) 2205 nill $len,0x0f # $len%16 2206 lrvg $s1,$tweak+0($sp) # load the tweak in little-endian 2207 lrvg $s3,$tweak+8($sp) 2208.Lxts_dec_2ndtweak: 2209 lghi %r1,0x87 2210 srag %r0,$s3,63 # broadcast upper bit 2211 ngr %r1,%r0 # rem 2212 algr $s1,$s1 2213 alcgr $s3,$s3 2214 xgr $s1,%r1 2215 lrvgr $s1,$s1 # flip byte order 2216 lrvgr $s3,$s3 2217 srlg $s0,$s1,32 # smash the tweak to 4x32-bits 2218 stg $s1,$tweak-16+0($sp) # save the 2nd tweak 2219 llgfr $s1,$s1 2220 srlg $s2,$s3,32 2221 stg $s3,$tweak-16+8($sp) 2222 llgfr $s3,$s3 2223 2224 x $s0,0($inp) # tweak_the_2nd^=*(inp) 2225 x $s1,4($inp) 2226 x $s2,8($inp) 2227 x $s3,12($inp) 2228 stm${g} %r2,%r3,2*$SIZE_T($sp) 2229 la $key,0($key1) 2230 bras $ra,_s390x_AES_decrypt 2231 lm${g} %r2,%r5,2*$SIZE_T($sp) 2232 x $s0,$tweak-16+0($sp) # ^=tweak_the_2nd 2233 x $s1,$tweak-16+4($sp) 2234 x $s2,$tweak-16+8($sp) 2235 x $s3,$tweak-16+12($sp) 2236 st $s0,0($out,$inp) 2237 st $s1,4($out,$inp) 2238 st $s2,8($out,$inp) 2239 st $s3,12($out,$inp) 2240 2241 la $i3,0($out,$inp) # put aside real $out 2242.Lxts_dec_steal: 2243 llgc %r0,16($inp) 2244 llgc %r1,0($out,$inp) 2245 stc %r0,0($out,$inp) 2246 stc %r1,16($out,$inp) 2247 la $inp,1($inp) 2248 brct $len,.Lxts_dec_steal 2249 la $out,0($i3) # restore real $out 2250 2251 lm $s0,$s3,$tweak($sp) # load the 1st tweak 2252 x $s0,0($out) # tweak^=*(inp)|stolen cipher-text 2253 x $s1,4($out) 2254 x $s2,8($out) 2255 x $s3,12($out) 2256 st${g} $out,4*$SIZE_T($sp) 2257 la $key,0($key1) 2258 bras $ra,_s390x_AES_decrypt 2259 l${g} $out,4*$SIZE_T($sp) 2260 x $s0,$tweak+0($sp) # ^=tweak 2261 x $s1,$tweak+4($sp) 2262 x $s2,$tweak+8($sp) 2263 x $s3,$tweak+12($sp) 2264 st $s0,0($out) 2265 st $s1,4($out) 2266 st $s2,8($out) 2267 st $s3,12($out) 2268 stg $sp,$tweak-16+0($sp) # wipe 2nd tweak 2269 stg $sp,$tweak-16+8($sp) 2270.Lxts_dec_done: 2271 stg $sp,$tweak+0($sp) # wipe tweak 2272 stg $sp,$tweak+8($sp) 2273 lm${g} %r6,$ra,6*$SIZE_T($sp) 2274 br $ra 2275.size AES_xts_decrypt,.-AES_xts_decrypt 2276___ 2277} 2278$code.=<<___; 2279.string "AES for s390x, CRYPTOGAMS by <appro\@openssl.org>" 2280___ 2281 2282$code =~ s/\`([^\`]*)\`/eval $1/gem; 2283print $code; 2284close STDOUT or die "error closing STDOUT: $!"; # force flush 2285