1#! /usr/bin/env perl 2# Copyright 2007-2020 The OpenSSL Project Authors. All Rights Reserved. 3# 4# Licensed under the Apache License 2.0 (the "License"). You may not use 5# this file except in compliance with the License. You can obtain a copy 6# in the file LICENSE in the source distribution or at 7# https://www.openssl.org/source/license.html 8 9 10# ==================================================================== 11# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 12# project. The module is, however, dual licensed under OpenSSL and 13# CRYPTOGAMS licenses depending on where you obtain it. For further 14# details see http://www.openssl.org/~appro/cryptogams/. 15# ==================================================================== 16 17# Needs more work: key setup, CBC routine... 18# 19# ppc_AES_[en|de]crypt perform at 18 cycles per byte processed with 20# 128-bit key, which is ~40% better than 64-bit code generated by gcc 21# 4.0. But these are not the ones currently used! Their "compact" 22# counterparts are, for security reason. ppc_AES_encrypt_compact runs 23# at 1/2 of ppc_AES_encrypt speed, while ppc_AES_decrypt_compact - 24# at 1/3 of ppc_AES_decrypt. 25 26# February 2010 27# 28# Rescheduling instructions to favour Power6 pipeline gave 10% 29# performance improvement on the platform in question (and marginal 30# improvement even on others). It should be noted that Power6 fails 31# to process byte in 18 cycles, only in 23, because it fails to issue 32# 4 load instructions in two cycles, only in 3. As result non-compact 33# block subroutines are 25% slower than one would expect. Compact 34# functions scale better, because they have pure computational part, 35# which scales perfectly with clock frequency. To be specific 36# ppc_AES_encrypt_compact operates at 42 cycles per byte, while 37# ppc_AES_decrypt_compact - at 55 (in 64-bit build). 38 39# $output is the last argument if it looks like a file (it has an extension) 40# $flavour is the first argument if it doesn't look like a file 41$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef; 42$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef; 43 44if ($flavour =~ /64/) { 45 $SIZE_T =8; 46 $LRSAVE =2*$SIZE_T; 47 $STU ="stdu"; 48 $POP ="ld"; 49 $PUSH ="std"; 50} elsif ($flavour =~ /32/) { 51 $SIZE_T =4; 52 $LRSAVE =$SIZE_T; 53 $STU ="stwu"; 54 $POP ="lwz"; 55 $PUSH ="stw"; 56} else { die "nonsense $flavour"; } 57 58$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0; 59 60$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 61( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or 62( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or 63die "can't locate ppc-xlate.pl"; 64 65open STDOUT,"| $^X $xlate $flavour \"$output\"" 66 or die "can't call $xlate: $!"; 67 68$FRAME=32*$SIZE_T; 69 70sub _data_word() 71{ my $i; 72 while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; } 73} 74 75$sp="r1"; 76$toc="r2"; 77$inp="r3"; 78$out="r4"; 79$key="r5"; 80 81$Tbl0="r3"; 82$Tbl1="r6"; 83$Tbl2="r7"; 84$Tbl3=$out; # stay away from "r2"; $out is offloaded to stack 85 86$s0="r8"; 87$s1="r9"; 88$s2="r10"; 89$s3="r11"; 90 91$t0="r12"; 92$t1="r0"; # stay away from "r13"; 93$t2="r14"; 94$t3="r15"; 95 96$acc00="r16"; 97$acc01="r17"; 98$acc02="r18"; 99$acc03="r19"; 100 101$acc04="r20"; 102$acc05="r21"; 103$acc06="r22"; 104$acc07="r23"; 105 106$acc08="r24"; 107$acc09="r25"; 108$acc10="r26"; 109$acc11="r27"; 110 111$acc12="r28"; 112$acc13="r29"; 113$acc14="r30"; 114$acc15="r31"; 115 116$mask80=$Tbl2; 117$mask1b=$Tbl3; 118 119$code.=<<___; 120.machine "any" 121.text 122 123.align 7 124LAES_Te: 125 mflr r0 126 bcl 20,31,\$+4 127 mflr $Tbl0 ; vvvvv "distance" between . and 1st data entry 128 addi $Tbl0,$Tbl0,`128-8` 129 mtlr r0 130 blr 131 .long 0 132 .byte 0,12,0x14,0,0,0,0,0 133 .space `64-9*4` 134LAES_Td: 135 mflr r0 136 bcl 20,31,\$+4 137 mflr $Tbl0 ; vvvvvvvv "distance" between . and 1st data entry 138 addi $Tbl0,$Tbl0,`128-64-8+2048+256` 139 mtlr r0 140 blr 141 .long 0 142 .byte 0,12,0x14,0,0,0,0,0 143 .space `128-64-9*4` 144___ 145&_data_word( 146 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d, 147 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554, 148 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d, 149 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a, 150 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87, 151 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b, 152 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea, 153 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b, 154 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a, 155 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f, 156 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108, 157 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f, 158 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e, 159 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5, 160 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d, 161 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f, 162 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e, 163 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb, 164 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce, 165 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497, 166 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c, 167 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed, 168 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b, 169 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a, 170 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16, 171 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594, 172 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81, 173 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3, 174 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a, 175 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504, 176 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163, 177 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d, 178 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f, 179 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739, 180 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47, 181 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395, 182 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f, 183 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883, 184 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c, 185 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76, 186 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e, 187 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4, 188 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6, 189 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b, 190 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7, 191 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0, 192 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25, 193 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818, 194 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72, 195 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651, 196 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21, 197 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85, 198 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa, 199 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12, 200 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0, 201 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9, 202 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133, 203 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7, 204 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920, 205 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a, 206 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17, 207 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8, 208 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11, 209 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a); 210$code.=<<___; 211.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 212.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 213.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0 214.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 215.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc 216.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 217.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a 218.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 219.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0 220.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 221.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b 222.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf 223.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85 224.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 225.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5 226.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 227.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17 228.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 229.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88 230.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb 231.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c 232.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 233.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9 234.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 235.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6 236.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a 237.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e 238.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e 239.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94 240.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf 241.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68 242.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 243___ 244&_data_word( 245 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96, 246 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393, 247 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25, 248 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f, 249 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1, 250 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6, 251 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da, 252 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844, 253 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd, 254 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4, 255 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45, 256 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94, 257 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7, 258 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a, 259 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5, 260 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c, 261 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1, 262 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a, 263 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75, 264 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051, 265 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46, 266 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff, 267 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77, 268 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb, 269 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000, 270 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e, 271 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927, 272 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a, 273 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e, 274 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16, 275 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d, 276 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8, 277 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd, 278 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34, 279 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163, 280 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120, 281 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d, 282 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0, 283 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422, 284 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef, 285 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36, 286 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4, 287 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662, 288 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5, 289 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3, 290 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b, 291 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8, 292 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6, 293 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6, 294 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0, 295 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815, 296 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f, 297 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df, 298 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f, 299 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e, 300 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713, 301 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89, 302 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c, 303 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf, 304 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86, 305 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f, 306 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541, 307 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190, 308 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742); 309$code.=<<___; 310.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 311.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb 312.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 313.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb 314.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d 315.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e 316.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 317.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 318.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 319.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 320.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda 321.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 322.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a 323.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 324.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 325.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b 326.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea 327.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 328.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 329.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e 330.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 331.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b 332.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 333.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 334.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 335.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f 336.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d 337.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef 338.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 339.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 340.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 341.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d 342 343 344.globl .AES_encrypt 345.align 7 346.AES_encrypt: 347 $STU $sp,-$FRAME($sp) 348 mflr r0 349 350 $PUSH $out,`$FRAME-$SIZE_T*19`($sp) 351 $PUSH r14,`$FRAME-$SIZE_T*18`($sp) 352 $PUSH r15,`$FRAME-$SIZE_T*17`($sp) 353 $PUSH r16,`$FRAME-$SIZE_T*16`($sp) 354 $PUSH r17,`$FRAME-$SIZE_T*15`($sp) 355 $PUSH r18,`$FRAME-$SIZE_T*14`($sp) 356 $PUSH r19,`$FRAME-$SIZE_T*13`($sp) 357 $PUSH r20,`$FRAME-$SIZE_T*12`($sp) 358 $PUSH r21,`$FRAME-$SIZE_T*11`($sp) 359 $PUSH r22,`$FRAME-$SIZE_T*10`($sp) 360 $PUSH r23,`$FRAME-$SIZE_T*9`($sp) 361 $PUSH r24,`$FRAME-$SIZE_T*8`($sp) 362 $PUSH r25,`$FRAME-$SIZE_T*7`($sp) 363 $PUSH r26,`$FRAME-$SIZE_T*6`($sp) 364 $PUSH r27,`$FRAME-$SIZE_T*5`($sp) 365 $PUSH r28,`$FRAME-$SIZE_T*4`($sp) 366 $PUSH r29,`$FRAME-$SIZE_T*3`($sp) 367 $PUSH r30,`$FRAME-$SIZE_T*2`($sp) 368 $PUSH r31,`$FRAME-$SIZE_T*1`($sp) 369 $PUSH r0,`$FRAME+$LRSAVE`($sp) 370 371 andi. $t0,$inp,3 372 andi. $t1,$out,3 373 or. $t0,$t0,$t1 374 bne Lenc_unaligned 375 376Lenc_unaligned_ok: 377___ 378$code.=<<___ if (!$LITTLE_ENDIAN); 379 lwz $s0,0($inp) 380 lwz $s1,4($inp) 381 lwz $s2,8($inp) 382 lwz $s3,12($inp) 383___ 384$code.=<<___ if ($LITTLE_ENDIAN); 385 lwz $t0,0($inp) 386 lwz $t1,4($inp) 387 lwz $t2,8($inp) 388 lwz $t3,12($inp) 389 rotlwi $s0,$t0,8 390 rotlwi $s1,$t1,8 391 rotlwi $s2,$t2,8 392 rotlwi $s3,$t3,8 393 rlwimi $s0,$t0,24,0,7 394 rlwimi $s1,$t1,24,0,7 395 rlwimi $s2,$t2,24,0,7 396 rlwimi $s3,$t3,24,0,7 397 rlwimi $s0,$t0,24,16,23 398 rlwimi $s1,$t1,24,16,23 399 rlwimi $s2,$t2,24,16,23 400 rlwimi $s3,$t3,24,16,23 401___ 402$code.=<<___; 403 bl LAES_Te 404 bl Lppc_AES_encrypt_compact 405 $POP $out,`$FRAME-$SIZE_T*19`($sp) 406___ 407$code.=<<___ if ($LITTLE_ENDIAN); 408 rotlwi $t0,$s0,8 409 rotlwi $t1,$s1,8 410 rotlwi $t2,$s2,8 411 rotlwi $t3,$s3,8 412 rlwimi $t0,$s0,24,0,7 413 rlwimi $t1,$s1,24,0,7 414 rlwimi $t2,$s2,24,0,7 415 rlwimi $t3,$s3,24,0,7 416 rlwimi $t0,$s0,24,16,23 417 rlwimi $t1,$s1,24,16,23 418 rlwimi $t2,$s2,24,16,23 419 rlwimi $t3,$s3,24,16,23 420 stw $t0,0($out) 421 stw $t1,4($out) 422 stw $t2,8($out) 423 stw $t3,12($out) 424___ 425$code.=<<___ if (!$LITTLE_ENDIAN); 426 stw $s0,0($out) 427 stw $s1,4($out) 428 stw $s2,8($out) 429 stw $s3,12($out) 430___ 431$code.=<<___; 432 b Lenc_done 433 434Lenc_unaligned: 435 subfic $t0,$inp,4096 436 subfic $t1,$out,4096 437 andi. $t0,$t0,4096-16 438 beq Lenc_xpage 439 andi. $t1,$t1,4096-16 440 bne Lenc_unaligned_ok 441 442Lenc_xpage: 443 lbz $acc00,0($inp) 444 lbz $acc01,1($inp) 445 lbz $acc02,2($inp) 446 lbz $s0,3($inp) 447 lbz $acc04,4($inp) 448 lbz $acc05,5($inp) 449 lbz $acc06,6($inp) 450 lbz $s1,7($inp) 451 lbz $acc08,8($inp) 452 lbz $acc09,9($inp) 453 lbz $acc10,10($inp) 454 insrwi $s0,$acc00,8,0 455 lbz $s2,11($inp) 456 insrwi $s1,$acc04,8,0 457 lbz $acc12,12($inp) 458 insrwi $s0,$acc01,8,8 459 lbz $acc13,13($inp) 460 insrwi $s1,$acc05,8,8 461 lbz $acc14,14($inp) 462 insrwi $s0,$acc02,8,16 463 lbz $s3,15($inp) 464 insrwi $s1,$acc06,8,16 465 insrwi $s2,$acc08,8,0 466 insrwi $s3,$acc12,8,0 467 insrwi $s2,$acc09,8,8 468 insrwi $s3,$acc13,8,8 469 insrwi $s2,$acc10,8,16 470 insrwi $s3,$acc14,8,16 471 472 bl LAES_Te 473 bl Lppc_AES_encrypt_compact 474 $POP $out,`$FRAME-$SIZE_T*19`($sp) 475 476 extrwi $acc00,$s0,8,0 477 extrwi $acc01,$s0,8,8 478 stb $acc00,0($out) 479 extrwi $acc02,$s0,8,16 480 stb $acc01,1($out) 481 stb $acc02,2($out) 482 extrwi $acc04,$s1,8,0 483 stb $s0,3($out) 484 extrwi $acc05,$s1,8,8 485 stb $acc04,4($out) 486 extrwi $acc06,$s1,8,16 487 stb $acc05,5($out) 488 stb $acc06,6($out) 489 extrwi $acc08,$s2,8,0 490 stb $s1,7($out) 491 extrwi $acc09,$s2,8,8 492 stb $acc08,8($out) 493 extrwi $acc10,$s2,8,16 494 stb $acc09,9($out) 495 stb $acc10,10($out) 496 extrwi $acc12,$s3,8,0 497 stb $s2,11($out) 498 extrwi $acc13,$s3,8,8 499 stb $acc12,12($out) 500 extrwi $acc14,$s3,8,16 501 stb $acc13,13($out) 502 stb $acc14,14($out) 503 stb $s3,15($out) 504 505Lenc_done: 506 $POP r0,`$FRAME+$LRSAVE`($sp) 507 $POP r14,`$FRAME-$SIZE_T*18`($sp) 508 $POP r15,`$FRAME-$SIZE_T*17`($sp) 509 $POP r16,`$FRAME-$SIZE_T*16`($sp) 510 $POP r17,`$FRAME-$SIZE_T*15`($sp) 511 $POP r18,`$FRAME-$SIZE_T*14`($sp) 512 $POP r19,`$FRAME-$SIZE_T*13`($sp) 513 $POP r20,`$FRAME-$SIZE_T*12`($sp) 514 $POP r21,`$FRAME-$SIZE_T*11`($sp) 515 $POP r22,`$FRAME-$SIZE_T*10`($sp) 516 $POP r23,`$FRAME-$SIZE_T*9`($sp) 517 $POP r24,`$FRAME-$SIZE_T*8`($sp) 518 $POP r25,`$FRAME-$SIZE_T*7`($sp) 519 $POP r26,`$FRAME-$SIZE_T*6`($sp) 520 $POP r27,`$FRAME-$SIZE_T*5`($sp) 521 $POP r28,`$FRAME-$SIZE_T*4`($sp) 522 $POP r29,`$FRAME-$SIZE_T*3`($sp) 523 $POP r30,`$FRAME-$SIZE_T*2`($sp) 524 $POP r31,`$FRAME-$SIZE_T*1`($sp) 525 mtlr r0 526 addi $sp,$sp,$FRAME 527 blr 528 .long 0 529 .byte 0,12,4,1,0x80,18,3,0 530 .long 0 531 532.align 5 533Lppc_AES_encrypt: 534 lwz $acc00,240($key) 535 addi $Tbl1,$Tbl0,3 536 lwz $t0,0($key) 537 addi $Tbl2,$Tbl0,2 538 lwz $t1,4($key) 539 addi $Tbl3,$Tbl0,1 540 lwz $t2,8($key) 541 addi $acc00,$acc00,-1 542 lwz $t3,12($key) 543 addi $key,$key,16 544 xor $s0,$s0,$t0 545 xor $s1,$s1,$t1 546 xor $s2,$s2,$t2 547 xor $s3,$s3,$t3 548 mtctr $acc00 549.align 4 550Lenc_loop: 551 rlwinm $acc00,$s0,`32-24+3`,21,28 552 rlwinm $acc01,$s1,`32-24+3`,21,28 553 rlwinm $acc02,$s2,`32-24+3`,21,28 554 rlwinm $acc03,$s3,`32-24+3`,21,28 555 lwz $t0,0($key) 556 rlwinm $acc04,$s1,`32-16+3`,21,28 557 lwz $t1,4($key) 558 rlwinm $acc05,$s2,`32-16+3`,21,28 559 lwz $t2,8($key) 560 rlwinm $acc06,$s3,`32-16+3`,21,28 561 lwz $t3,12($key) 562 rlwinm $acc07,$s0,`32-16+3`,21,28 563 lwzx $acc00,$Tbl0,$acc00 564 rlwinm $acc08,$s2,`32-8+3`,21,28 565 lwzx $acc01,$Tbl0,$acc01 566 rlwinm $acc09,$s3,`32-8+3`,21,28 567 lwzx $acc02,$Tbl0,$acc02 568 rlwinm $acc10,$s0,`32-8+3`,21,28 569 lwzx $acc03,$Tbl0,$acc03 570 rlwinm $acc11,$s1,`32-8+3`,21,28 571 lwzx $acc04,$Tbl1,$acc04 572 rlwinm $acc12,$s3,`0+3`,21,28 573 lwzx $acc05,$Tbl1,$acc05 574 rlwinm $acc13,$s0,`0+3`,21,28 575 lwzx $acc06,$Tbl1,$acc06 576 rlwinm $acc14,$s1,`0+3`,21,28 577 lwzx $acc07,$Tbl1,$acc07 578 rlwinm $acc15,$s2,`0+3`,21,28 579 lwzx $acc08,$Tbl2,$acc08 580 xor $t0,$t0,$acc00 581 lwzx $acc09,$Tbl2,$acc09 582 xor $t1,$t1,$acc01 583 lwzx $acc10,$Tbl2,$acc10 584 xor $t2,$t2,$acc02 585 lwzx $acc11,$Tbl2,$acc11 586 xor $t3,$t3,$acc03 587 lwzx $acc12,$Tbl3,$acc12 588 xor $t0,$t0,$acc04 589 lwzx $acc13,$Tbl3,$acc13 590 xor $t1,$t1,$acc05 591 lwzx $acc14,$Tbl3,$acc14 592 xor $t2,$t2,$acc06 593 lwzx $acc15,$Tbl3,$acc15 594 xor $t3,$t3,$acc07 595 xor $t0,$t0,$acc08 596 xor $t1,$t1,$acc09 597 xor $t2,$t2,$acc10 598 xor $t3,$t3,$acc11 599 xor $s0,$t0,$acc12 600 xor $s1,$t1,$acc13 601 xor $s2,$t2,$acc14 602 xor $s3,$t3,$acc15 603 addi $key,$key,16 604 bdnz Lenc_loop 605 606 addi $Tbl2,$Tbl0,2048 607 nop 608 lwz $t0,0($key) 609 rlwinm $acc00,$s0,`32-24`,24,31 610 lwz $t1,4($key) 611 rlwinm $acc01,$s1,`32-24`,24,31 612 lwz $t2,8($key) 613 rlwinm $acc02,$s2,`32-24`,24,31 614 lwz $t3,12($key) 615 rlwinm $acc03,$s3,`32-24`,24,31 616 lwz $acc08,`2048+0`($Tbl0) ! prefetch Te4 617 rlwinm $acc04,$s1,`32-16`,24,31 618 lwz $acc09,`2048+32`($Tbl0) 619 rlwinm $acc05,$s2,`32-16`,24,31 620 lwz $acc10,`2048+64`($Tbl0) 621 rlwinm $acc06,$s3,`32-16`,24,31 622 lwz $acc11,`2048+96`($Tbl0) 623 rlwinm $acc07,$s0,`32-16`,24,31 624 lwz $acc12,`2048+128`($Tbl0) 625 rlwinm $acc08,$s2,`32-8`,24,31 626 lwz $acc13,`2048+160`($Tbl0) 627 rlwinm $acc09,$s3,`32-8`,24,31 628 lwz $acc14,`2048+192`($Tbl0) 629 rlwinm $acc10,$s0,`32-8`,24,31 630 lwz $acc15,`2048+224`($Tbl0) 631 rlwinm $acc11,$s1,`32-8`,24,31 632 lbzx $acc00,$Tbl2,$acc00 633 rlwinm $acc12,$s3,`0`,24,31 634 lbzx $acc01,$Tbl2,$acc01 635 rlwinm $acc13,$s0,`0`,24,31 636 lbzx $acc02,$Tbl2,$acc02 637 rlwinm $acc14,$s1,`0`,24,31 638 lbzx $acc03,$Tbl2,$acc03 639 rlwinm $acc15,$s2,`0`,24,31 640 lbzx $acc04,$Tbl2,$acc04 641 rlwinm $s0,$acc00,24,0,7 642 lbzx $acc05,$Tbl2,$acc05 643 rlwinm $s1,$acc01,24,0,7 644 lbzx $acc06,$Tbl2,$acc06 645 rlwinm $s2,$acc02,24,0,7 646 lbzx $acc07,$Tbl2,$acc07 647 rlwinm $s3,$acc03,24,0,7 648 lbzx $acc08,$Tbl2,$acc08 649 rlwimi $s0,$acc04,16,8,15 650 lbzx $acc09,$Tbl2,$acc09 651 rlwimi $s1,$acc05,16,8,15 652 lbzx $acc10,$Tbl2,$acc10 653 rlwimi $s2,$acc06,16,8,15 654 lbzx $acc11,$Tbl2,$acc11 655 rlwimi $s3,$acc07,16,8,15 656 lbzx $acc12,$Tbl2,$acc12 657 rlwimi $s0,$acc08,8,16,23 658 lbzx $acc13,$Tbl2,$acc13 659 rlwimi $s1,$acc09,8,16,23 660 lbzx $acc14,$Tbl2,$acc14 661 rlwimi $s2,$acc10,8,16,23 662 lbzx $acc15,$Tbl2,$acc15 663 rlwimi $s3,$acc11,8,16,23 664 or $s0,$s0,$acc12 665 or $s1,$s1,$acc13 666 or $s2,$s2,$acc14 667 or $s3,$s3,$acc15 668 xor $s0,$s0,$t0 669 xor $s1,$s1,$t1 670 xor $s2,$s2,$t2 671 xor $s3,$s3,$t3 672 blr 673 .long 0 674 .byte 0,12,0x14,0,0,0,0,0 675 676.align 4 677Lppc_AES_encrypt_compact: 678 lwz $acc00,240($key) 679 addi $Tbl1,$Tbl0,2048 680 lwz $t0,0($key) 681 lis $mask80,0x8080 682 lwz $t1,4($key) 683 lis $mask1b,0x1b1b 684 lwz $t2,8($key) 685 ori $mask80,$mask80,0x8080 686 lwz $t3,12($key) 687 ori $mask1b,$mask1b,0x1b1b 688 addi $key,$key,16 689 mtctr $acc00 690.align 4 691Lenc_compact_loop: 692 xor $s0,$s0,$t0 693 xor $s1,$s1,$t1 694 rlwinm $acc00,$s0,`32-24`,24,31 695 xor $s2,$s2,$t2 696 rlwinm $acc01,$s1,`32-24`,24,31 697 xor $s3,$s3,$t3 698 rlwinm $acc02,$s2,`32-24`,24,31 699 rlwinm $acc03,$s3,`32-24`,24,31 700 rlwinm $acc04,$s1,`32-16`,24,31 701 rlwinm $acc05,$s2,`32-16`,24,31 702 rlwinm $acc06,$s3,`32-16`,24,31 703 rlwinm $acc07,$s0,`32-16`,24,31 704 lbzx $acc00,$Tbl1,$acc00 705 rlwinm $acc08,$s2,`32-8`,24,31 706 lbzx $acc01,$Tbl1,$acc01 707 rlwinm $acc09,$s3,`32-8`,24,31 708 lbzx $acc02,$Tbl1,$acc02 709 rlwinm $acc10,$s0,`32-8`,24,31 710 lbzx $acc03,$Tbl1,$acc03 711 rlwinm $acc11,$s1,`32-8`,24,31 712 lbzx $acc04,$Tbl1,$acc04 713 rlwinm $acc12,$s3,`0`,24,31 714 lbzx $acc05,$Tbl1,$acc05 715 rlwinm $acc13,$s0,`0`,24,31 716 lbzx $acc06,$Tbl1,$acc06 717 rlwinm $acc14,$s1,`0`,24,31 718 lbzx $acc07,$Tbl1,$acc07 719 rlwinm $acc15,$s2,`0`,24,31 720 lbzx $acc08,$Tbl1,$acc08 721 rlwinm $s0,$acc00,24,0,7 722 lbzx $acc09,$Tbl1,$acc09 723 rlwinm $s1,$acc01,24,0,7 724 lbzx $acc10,$Tbl1,$acc10 725 rlwinm $s2,$acc02,24,0,7 726 lbzx $acc11,$Tbl1,$acc11 727 rlwinm $s3,$acc03,24,0,7 728 lbzx $acc12,$Tbl1,$acc12 729 rlwimi $s0,$acc04,16,8,15 730 lbzx $acc13,$Tbl1,$acc13 731 rlwimi $s1,$acc05,16,8,15 732 lbzx $acc14,$Tbl1,$acc14 733 rlwimi $s2,$acc06,16,8,15 734 lbzx $acc15,$Tbl1,$acc15 735 rlwimi $s3,$acc07,16,8,15 736 rlwimi $s0,$acc08,8,16,23 737 rlwimi $s1,$acc09,8,16,23 738 rlwimi $s2,$acc10,8,16,23 739 rlwimi $s3,$acc11,8,16,23 740 lwz $t0,0($key) 741 or $s0,$s0,$acc12 742 lwz $t1,4($key) 743 or $s1,$s1,$acc13 744 lwz $t2,8($key) 745 or $s2,$s2,$acc14 746 lwz $t3,12($key) 747 or $s3,$s3,$acc15 748 749 addi $key,$key,16 750 bdz Lenc_compact_done 751 752 and $acc00,$s0,$mask80 # r1=r0&0x80808080 753 and $acc01,$s1,$mask80 754 and $acc02,$s2,$mask80 755 and $acc03,$s3,$mask80 756 srwi $acc04,$acc00,7 # r1>>7 757 andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f 758 srwi $acc05,$acc01,7 759 andc $acc09,$s1,$mask80 760 srwi $acc06,$acc02,7 761 andc $acc10,$s2,$mask80 762 srwi $acc07,$acc03,7 763 andc $acc11,$s3,$mask80 764 sub $acc00,$acc00,$acc04 # r1-(r1>>7) 765 sub $acc01,$acc01,$acc05 766 sub $acc02,$acc02,$acc06 767 sub $acc03,$acc03,$acc07 768 add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1 769 add $acc09,$acc09,$acc09 770 add $acc10,$acc10,$acc10 771 add $acc11,$acc11,$acc11 772 and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b 773 and $acc01,$acc01,$mask1b 774 and $acc02,$acc02,$mask1b 775 and $acc03,$acc03,$mask1b 776 xor $acc00,$acc00,$acc08 # r2 777 xor $acc01,$acc01,$acc09 778 rotlwi $acc12,$s0,16 # ROTATE(r0,16) 779 xor $acc02,$acc02,$acc10 780 rotlwi $acc13,$s1,16 781 xor $acc03,$acc03,$acc11 782 rotlwi $acc14,$s2,16 783 784 xor $s0,$s0,$acc00 # r0^r2 785 rotlwi $acc15,$s3,16 786 xor $s1,$s1,$acc01 787 rotrwi $s0,$s0,24 # ROTATE(r2^r0,24) 788 xor $s2,$s2,$acc02 789 rotrwi $s1,$s1,24 790 xor $s3,$s3,$acc03 791 rotrwi $s2,$s2,24 792 xor $s0,$s0,$acc00 # ROTATE(r2^r0,24)^r2 793 rotrwi $s3,$s3,24 794 xor $s1,$s1,$acc01 795 xor $s2,$s2,$acc02 796 xor $s3,$s3,$acc03 797 rotlwi $acc08,$acc12,8 # ROTATE(r0,24) 798 xor $s0,$s0,$acc12 # 799 rotlwi $acc09,$acc13,8 800 xor $s1,$s1,$acc13 801 rotlwi $acc10,$acc14,8 802 xor $s2,$s2,$acc14 803 rotlwi $acc11,$acc15,8 804 xor $s3,$s3,$acc15 805 xor $s0,$s0,$acc08 # 806 xor $s1,$s1,$acc09 807 xor $s2,$s2,$acc10 808 xor $s3,$s3,$acc11 809 810 b Lenc_compact_loop 811.align 4 812Lenc_compact_done: 813 xor $s0,$s0,$t0 814 xor $s1,$s1,$t1 815 xor $s2,$s2,$t2 816 xor $s3,$s3,$t3 817 blr 818 .long 0 819 .byte 0,12,0x14,0,0,0,0,0 820.size .AES_encrypt,.-.AES_encrypt 821 822.globl .AES_decrypt 823.align 7 824.AES_decrypt: 825 $STU $sp,-$FRAME($sp) 826 mflr r0 827 828 $PUSH $out,`$FRAME-$SIZE_T*19`($sp) 829 $PUSH r14,`$FRAME-$SIZE_T*18`($sp) 830 $PUSH r15,`$FRAME-$SIZE_T*17`($sp) 831 $PUSH r16,`$FRAME-$SIZE_T*16`($sp) 832 $PUSH r17,`$FRAME-$SIZE_T*15`($sp) 833 $PUSH r18,`$FRAME-$SIZE_T*14`($sp) 834 $PUSH r19,`$FRAME-$SIZE_T*13`($sp) 835 $PUSH r20,`$FRAME-$SIZE_T*12`($sp) 836 $PUSH r21,`$FRAME-$SIZE_T*11`($sp) 837 $PUSH r22,`$FRAME-$SIZE_T*10`($sp) 838 $PUSH r23,`$FRAME-$SIZE_T*9`($sp) 839 $PUSH r24,`$FRAME-$SIZE_T*8`($sp) 840 $PUSH r25,`$FRAME-$SIZE_T*7`($sp) 841 $PUSH r26,`$FRAME-$SIZE_T*6`($sp) 842 $PUSH r27,`$FRAME-$SIZE_T*5`($sp) 843 $PUSH r28,`$FRAME-$SIZE_T*4`($sp) 844 $PUSH r29,`$FRAME-$SIZE_T*3`($sp) 845 $PUSH r30,`$FRAME-$SIZE_T*2`($sp) 846 $PUSH r31,`$FRAME-$SIZE_T*1`($sp) 847 $PUSH r0,`$FRAME+$LRSAVE`($sp) 848 849 andi. $t0,$inp,3 850 andi. $t1,$out,3 851 or. $t0,$t0,$t1 852 bne Ldec_unaligned 853 854Ldec_unaligned_ok: 855___ 856$code.=<<___ if (!$LITTLE_ENDIAN); 857 lwz $s0,0($inp) 858 lwz $s1,4($inp) 859 lwz $s2,8($inp) 860 lwz $s3,12($inp) 861___ 862$code.=<<___ if ($LITTLE_ENDIAN); 863 lwz $t0,0($inp) 864 lwz $t1,4($inp) 865 lwz $t2,8($inp) 866 lwz $t3,12($inp) 867 rotlwi $s0,$t0,8 868 rotlwi $s1,$t1,8 869 rotlwi $s2,$t2,8 870 rotlwi $s3,$t3,8 871 rlwimi $s0,$t0,24,0,7 872 rlwimi $s1,$t1,24,0,7 873 rlwimi $s2,$t2,24,0,7 874 rlwimi $s3,$t3,24,0,7 875 rlwimi $s0,$t0,24,16,23 876 rlwimi $s1,$t1,24,16,23 877 rlwimi $s2,$t2,24,16,23 878 rlwimi $s3,$t3,24,16,23 879___ 880$code.=<<___; 881 bl LAES_Td 882 bl Lppc_AES_decrypt_compact 883 $POP $out,`$FRAME-$SIZE_T*19`($sp) 884___ 885$code.=<<___ if ($LITTLE_ENDIAN); 886 rotlwi $t0,$s0,8 887 rotlwi $t1,$s1,8 888 rotlwi $t2,$s2,8 889 rotlwi $t3,$s3,8 890 rlwimi $t0,$s0,24,0,7 891 rlwimi $t1,$s1,24,0,7 892 rlwimi $t2,$s2,24,0,7 893 rlwimi $t3,$s3,24,0,7 894 rlwimi $t0,$s0,24,16,23 895 rlwimi $t1,$s1,24,16,23 896 rlwimi $t2,$s2,24,16,23 897 rlwimi $t3,$s3,24,16,23 898 stw $t0,0($out) 899 stw $t1,4($out) 900 stw $t2,8($out) 901 stw $t3,12($out) 902___ 903$code.=<<___ if (!$LITTLE_ENDIAN); 904 stw $s0,0($out) 905 stw $s1,4($out) 906 stw $s2,8($out) 907 stw $s3,12($out) 908___ 909$code.=<<___; 910 b Ldec_done 911 912Ldec_unaligned: 913 subfic $t0,$inp,4096 914 subfic $t1,$out,4096 915 andi. $t0,$t0,4096-16 916 beq Ldec_xpage 917 andi. $t1,$t1,4096-16 918 bne Ldec_unaligned_ok 919 920Ldec_xpage: 921 lbz $acc00,0($inp) 922 lbz $acc01,1($inp) 923 lbz $acc02,2($inp) 924 lbz $s0,3($inp) 925 lbz $acc04,4($inp) 926 lbz $acc05,5($inp) 927 lbz $acc06,6($inp) 928 lbz $s1,7($inp) 929 lbz $acc08,8($inp) 930 lbz $acc09,9($inp) 931 lbz $acc10,10($inp) 932 insrwi $s0,$acc00,8,0 933 lbz $s2,11($inp) 934 insrwi $s1,$acc04,8,0 935 lbz $acc12,12($inp) 936 insrwi $s0,$acc01,8,8 937 lbz $acc13,13($inp) 938 insrwi $s1,$acc05,8,8 939 lbz $acc14,14($inp) 940 insrwi $s0,$acc02,8,16 941 lbz $s3,15($inp) 942 insrwi $s1,$acc06,8,16 943 insrwi $s2,$acc08,8,0 944 insrwi $s3,$acc12,8,0 945 insrwi $s2,$acc09,8,8 946 insrwi $s3,$acc13,8,8 947 insrwi $s2,$acc10,8,16 948 insrwi $s3,$acc14,8,16 949 950 bl LAES_Td 951 bl Lppc_AES_decrypt_compact 952 $POP $out,`$FRAME-$SIZE_T*19`($sp) 953 954 extrwi $acc00,$s0,8,0 955 extrwi $acc01,$s0,8,8 956 stb $acc00,0($out) 957 extrwi $acc02,$s0,8,16 958 stb $acc01,1($out) 959 stb $acc02,2($out) 960 extrwi $acc04,$s1,8,0 961 stb $s0,3($out) 962 extrwi $acc05,$s1,8,8 963 stb $acc04,4($out) 964 extrwi $acc06,$s1,8,16 965 stb $acc05,5($out) 966 stb $acc06,6($out) 967 extrwi $acc08,$s2,8,0 968 stb $s1,7($out) 969 extrwi $acc09,$s2,8,8 970 stb $acc08,8($out) 971 extrwi $acc10,$s2,8,16 972 stb $acc09,9($out) 973 stb $acc10,10($out) 974 extrwi $acc12,$s3,8,0 975 stb $s2,11($out) 976 extrwi $acc13,$s3,8,8 977 stb $acc12,12($out) 978 extrwi $acc14,$s3,8,16 979 stb $acc13,13($out) 980 stb $acc14,14($out) 981 stb $s3,15($out) 982 983Ldec_done: 984 $POP r0,`$FRAME+$LRSAVE`($sp) 985 $POP r14,`$FRAME-$SIZE_T*18`($sp) 986 $POP r15,`$FRAME-$SIZE_T*17`($sp) 987 $POP r16,`$FRAME-$SIZE_T*16`($sp) 988 $POP r17,`$FRAME-$SIZE_T*15`($sp) 989 $POP r18,`$FRAME-$SIZE_T*14`($sp) 990 $POP r19,`$FRAME-$SIZE_T*13`($sp) 991 $POP r20,`$FRAME-$SIZE_T*12`($sp) 992 $POP r21,`$FRAME-$SIZE_T*11`($sp) 993 $POP r22,`$FRAME-$SIZE_T*10`($sp) 994 $POP r23,`$FRAME-$SIZE_T*9`($sp) 995 $POP r24,`$FRAME-$SIZE_T*8`($sp) 996 $POP r25,`$FRAME-$SIZE_T*7`($sp) 997 $POP r26,`$FRAME-$SIZE_T*6`($sp) 998 $POP r27,`$FRAME-$SIZE_T*5`($sp) 999 $POP r28,`$FRAME-$SIZE_T*4`($sp) 1000 $POP r29,`$FRAME-$SIZE_T*3`($sp) 1001 $POP r30,`$FRAME-$SIZE_T*2`($sp) 1002 $POP r31,`$FRAME-$SIZE_T*1`($sp) 1003 mtlr r0 1004 addi $sp,$sp,$FRAME 1005 blr 1006 .long 0 1007 .byte 0,12,4,1,0x80,18,3,0 1008 .long 0 1009 1010.align 5 1011Lppc_AES_decrypt: 1012 lwz $acc00,240($key) 1013 addi $Tbl1,$Tbl0,3 1014 lwz $t0,0($key) 1015 addi $Tbl2,$Tbl0,2 1016 lwz $t1,4($key) 1017 addi $Tbl3,$Tbl0,1 1018 lwz $t2,8($key) 1019 addi $acc00,$acc00,-1 1020 lwz $t3,12($key) 1021 addi $key,$key,16 1022 xor $s0,$s0,$t0 1023 xor $s1,$s1,$t1 1024 xor $s2,$s2,$t2 1025 xor $s3,$s3,$t3 1026 mtctr $acc00 1027.align 4 1028Ldec_loop: 1029 rlwinm $acc00,$s0,`32-24+3`,21,28 1030 rlwinm $acc01,$s1,`32-24+3`,21,28 1031 rlwinm $acc02,$s2,`32-24+3`,21,28 1032 rlwinm $acc03,$s3,`32-24+3`,21,28 1033 lwz $t0,0($key) 1034 rlwinm $acc04,$s3,`32-16+3`,21,28 1035 lwz $t1,4($key) 1036 rlwinm $acc05,$s0,`32-16+3`,21,28 1037 lwz $t2,8($key) 1038 rlwinm $acc06,$s1,`32-16+3`,21,28 1039 lwz $t3,12($key) 1040 rlwinm $acc07,$s2,`32-16+3`,21,28 1041 lwzx $acc00,$Tbl0,$acc00 1042 rlwinm $acc08,$s2,`32-8+3`,21,28 1043 lwzx $acc01,$Tbl0,$acc01 1044 rlwinm $acc09,$s3,`32-8+3`,21,28 1045 lwzx $acc02,$Tbl0,$acc02 1046 rlwinm $acc10,$s0,`32-8+3`,21,28 1047 lwzx $acc03,$Tbl0,$acc03 1048 rlwinm $acc11,$s1,`32-8+3`,21,28 1049 lwzx $acc04,$Tbl1,$acc04 1050 rlwinm $acc12,$s1,`0+3`,21,28 1051 lwzx $acc05,$Tbl1,$acc05 1052 rlwinm $acc13,$s2,`0+3`,21,28 1053 lwzx $acc06,$Tbl1,$acc06 1054 rlwinm $acc14,$s3,`0+3`,21,28 1055 lwzx $acc07,$Tbl1,$acc07 1056 rlwinm $acc15,$s0,`0+3`,21,28 1057 lwzx $acc08,$Tbl2,$acc08 1058 xor $t0,$t0,$acc00 1059 lwzx $acc09,$Tbl2,$acc09 1060 xor $t1,$t1,$acc01 1061 lwzx $acc10,$Tbl2,$acc10 1062 xor $t2,$t2,$acc02 1063 lwzx $acc11,$Tbl2,$acc11 1064 xor $t3,$t3,$acc03 1065 lwzx $acc12,$Tbl3,$acc12 1066 xor $t0,$t0,$acc04 1067 lwzx $acc13,$Tbl3,$acc13 1068 xor $t1,$t1,$acc05 1069 lwzx $acc14,$Tbl3,$acc14 1070 xor $t2,$t2,$acc06 1071 lwzx $acc15,$Tbl3,$acc15 1072 xor $t3,$t3,$acc07 1073 xor $t0,$t0,$acc08 1074 xor $t1,$t1,$acc09 1075 xor $t2,$t2,$acc10 1076 xor $t3,$t3,$acc11 1077 xor $s0,$t0,$acc12 1078 xor $s1,$t1,$acc13 1079 xor $s2,$t2,$acc14 1080 xor $s3,$t3,$acc15 1081 addi $key,$key,16 1082 bdnz Ldec_loop 1083 1084 addi $Tbl2,$Tbl0,2048 1085 nop 1086 lwz $t0,0($key) 1087 rlwinm $acc00,$s0,`32-24`,24,31 1088 lwz $t1,4($key) 1089 rlwinm $acc01,$s1,`32-24`,24,31 1090 lwz $t2,8($key) 1091 rlwinm $acc02,$s2,`32-24`,24,31 1092 lwz $t3,12($key) 1093 rlwinm $acc03,$s3,`32-24`,24,31 1094 lwz $acc08,`2048+0`($Tbl0) ! prefetch Td4 1095 rlwinm $acc04,$s3,`32-16`,24,31 1096 lwz $acc09,`2048+32`($Tbl0) 1097 rlwinm $acc05,$s0,`32-16`,24,31 1098 lwz $acc10,`2048+64`($Tbl0) 1099 lbzx $acc00,$Tbl2,$acc00 1100 lwz $acc11,`2048+96`($Tbl0) 1101 lbzx $acc01,$Tbl2,$acc01 1102 lwz $acc12,`2048+128`($Tbl0) 1103 rlwinm $acc06,$s1,`32-16`,24,31 1104 lwz $acc13,`2048+160`($Tbl0) 1105 rlwinm $acc07,$s2,`32-16`,24,31 1106 lwz $acc14,`2048+192`($Tbl0) 1107 rlwinm $acc08,$s2,`32-8`,24,31 1108 lwz $acc15,`2048+224`($Tbl0) 1109 rlwinm $acc09,$s3,`32-8`,24,31 1110 lbzx $acc02,$Tbl2,$acc02 1111 rlwinm $acc10,$s0,`32-8`,24,31 1112 lbzx $acc03,$Tbl2,$acc03 1113 rlwinm $acc11,$s1,`32-8`,24,31 1114 lbzx $acc04,$Tbl2,$acc04 1115 rlwinm $acc12,$s1,`0`,24,31 1116 lbzx $acc05,$Tbl2,$acc05 1117 rlwinm $acc13,$s2,`0`,24,31 1118 lbzx $acc06,$Tbl2,$acc06 1119 rlwinm $acc14,$s3,`0`,24,31 1120 lbzx $acc07,$Tbl2,$acc07 1121 rlwinm $acc15,$s0,`0`,24,31 1122 lbzx $acc08,$Tbl2,$acc08 1123 rlwinm $s0,$acc00,24,0,7 1124 lbzx $acc09,$Tbl2,$acc09 1125 rlwinm $s1,$acc01,24,0,7 1126 lbzx $acc10,$Tbl2,$acc10 1127 rlwinm $s2,$acc02,24,0,7 1128 lbzx $acc11,$Tbl2,$acc11 1129 rlwinm $s3,$acc03,24,0,7 1130 lbzx $acc12,$Tbl2,$acc12 1131 rlwimi $s0,$acc04,16,8,15 1132 lbzx $acc13,$Tbl2,$acc13 1133 rlwimi $s1,$acc05,16,8,15 1134 lbzx $acc14,$Tbl2,$acc14 1135 rlwimi $s2,$acc06,16,8,15 1136 lbzx $acc15,$Tbl2,$acc15 1137 rlwimi $s3,$acc07,16,8,15 1138 rlwimi $s0,$acc08,8,16,23 1139 rlwimi $s1,$acc09,8,16,23 1140 rlwimi $s2,$acc10,8,16,23 1141 rlwimi $s3,$acc11,8,16,23 1142 or $s0,$s0,$acc12 1143 or $s1,$s1,$acc13 1144 or $s2,$s2,$acc14 1145 or $s3,$s3,$acc15 1146 xor $s0,$s0,$t0 1147 xor $s1,$s1,$t1 1148 xor $s2,$s2,$t2 1149 xor $s3,$s3,$t3 1150 blr 1151 .long 0 1152 .byte 0,12,0x14,0,0,0,0,0 1153 1154.align 4 1155Lppc_AES_decrypt_compact: 1156 lwz $acc00,240($key) 1157 addi $Tbl1,$Tbl0,2048 1158 lwz $t0,0($key) 1159 lis $mask80,0x8080 1160 lwz $t1,4($key) 1161 lis $mask1b,0x1b1b 1162 lwz $t2,8($key) 1163 ori $mask80,$mask80,0x8080 1164 lwz $t3,12($key) 1165 ori $mask1b,$mask1b,0x1b1b 1166 addi $key,$key,16 1167___ 1168$code.=<<___ if ($SIZE_T==8); 1169 insrdi $mask80,$mask80,32,0 1170 insrdi $mask1b,$mask1b,32,0 1171___ 1172$code.=<<___; 1173 mtctr $acc00 1174.align 4 1175Ldec_compact_loop: 1176 xor $s0,$s0,$t0 1177 xor $s1,$s1,$t1 1178 rlwinm $acc00,$s0,`32-24`,24,31 1179 xor $s2,$s2,$t2 1180 rlwinm $acc01,$s1,`32-24`,24,31 1181 xor $s3,$s3,$t3 1182 rlwinm $acc02,$s2,`32-24`,24,31 1183 rlwinm $acc03,$s3,`32-24`,24,31 1184 rlwinm $acc04,$s3,`32-16`,24,31 1185 rlwinm $acc05,$s0,`32-16`,24,31 1186 rlwinm $acc06,$s1,`32-16`,24,31 1187 rlwinm $acc07,$s2,`32-16`,24,31 1188 lbzx $acc00,$Tbl1,$acc00 1189 rlwinm $acc08,$s2,`32-8`,24,31 1190 lbzx $acc01,$Tbl1,$acc01 1191 rlwinm $acc09,$s3,`32-8`,24,31 1192 lbzx $acc02,$Tbl1,$acc02 1193 rlwinm $acc10,$s0,`32-8`,24,31 1194 lbzx $acc03,$Tbl1,$acc03 1195 rlwinm $acc11,$s1,`32-8`,24,31 1196 lbzx $acc04,$Tbl1,$acc04 1197 rlwinm $acc12,$s1,`0`,24,31 1198 lbzx $acc05,$Tbl1,$acc05 1199 rlwinm $acc13,$s2,`0`,24,31 1200 lbzx $acc06,$Tbl1,$acc06 1201 rlwinm $acc14,$s3,`0`,24,31 1202 lbzx $acc07,$Tbl1,$acc07 1203 rlwinm $acc15,$s0,`0`,24,31 1204 lbzx $acc08,$Tbl1,$acc08 1205 rlwinm $s0,$acc00,24,0,7 1206 lbzx $acc09,$Tbl1,$acc09 1207 rlwinm $s1,$acc01,24,0,7 1208 lbzx $acc10,$Tbl1,$acc10 1209 rlwinm $s2,$acc02,24,0,7 1210 lbzx $acc11,$Tbl1,$acc11 1211 rlwinm $s3,$acc03,24,0,7 1212 lbzx $acc12,$Tbl1,$acc12 1213 rlwimi $s0,$acc04,16,8,15 1214 lbzx $acc13,$Tbl1,$acc13 1215 rlwimi $s1,$acc05,16,8,15 1216 lbzx $acc14,$Tbl1,$acc14 1217 rlwimi $s2,$acc06,16,8,15 1218 lbzx $acc15,$Tbl1,$acc15 1219 rlwimi $s3,$acc07,16,8,15 1220 rlwimi $s0,$acc08,8,16,23 1221 rlwimi $s1,$acc09,8,16,23 1222 rlwimi $s2,$acc10,8,16,23 1223 rlwimi $s3,$acc11,8,16,23 1224 lwz $t0,0($key) 1225 or $s0,$s0,$acc12 1226 lwz $t1,4($key) 1227 or $s1,$s1,$acc13 1228 lwz $t2,8($key) 1229 or $s2,$s2,$acc14 1230 lwz $t3,12($key) 1231 or $s3,$s3,$acc15 1232 1233 addi $key,$key,16 1234 bdz Ldec_compact_done 1235___ 1236$code.=<<___ if ($SIZE_T==8); 1237 # vectorized permutation improves decrypt performance by 10% 1238 insrdi $s0,$s1,32,0 1239 insrdi $s2,$s3,32,0 1240 1241 and $acc00,$s0,$mask80 # r1=r0&0x80808080 1242 and $acc02,$s2,$mask80 1243 srdi $acc04,$acc00,7 # r1>>7 1244 srdi $acc06,$acc02,7 1245 andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f 1246 andc $acc10,$s2,$mask80 1247 sub $acc00,$acc00,$acc04 # r1-(r1>>7) 1248 sub $acc02,$acc02,$acc06 1249 add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1 1250 add $acc10,$acc10,$acc10 1251 and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b 1252 and $acc02,$acc02,$mask1b 1253 xor $acc00,$acc00,$acc08 # r2 1254 xor $acc02,$acc02,$acc10 1255 1256 and $acc04,$acc00,$mask80 # r1=r2&0x80808080 1257 and $acc06,$acc02,$mask80 1258 srdi $acc08,$acc04,7 # r1>>7 1259 srdi $acc10,$acc06,7 1260 andc $acc12,$acc00,$mask80 # r2&0x7f7f7f7f 1261 andc $acc14,$acc02,$mask80 1262 sub $acc04,$acc04,$acc08 # r1-(r1>>7) 1263 sub $acc06,$acc06,$acc10 1264 add $acc12,$acc12,$acc12 # (r2&0x7f7f7f7f)<<1 1265 add $acc14,$acc14,$acc14 1266 and $acc04,$acc04,$mask1b # (r1-(r1>>7))&0x1b1b1b1b 1267 and $acc06,$acc06,$mask1b 1268 xor $acc04,$acc04,$acc12 # r4 1269 xor $acc06,$acc06,$acc14 1270 1271 and $acc08,$acc04,$mask80 # r1=r4&0x80808080 1272 and $acc10,$acc06,$mask80 1273 srdi $acc12,$acc08,7 # r1>>7 1274 srdi $acc14,$acc10,7 1275 sub $acc08,$acc08,$acc12 # r1-(r1>>7) 1276 sub $acc10,$acc10,$acc14 1277 andc $acc12,$acc04,$mask80 # r4&0x7f7f7f7f 1278 andc $acc14,$acc06,$mask80 1279 add $acc12,$acc12,$acc12 # (r4&0x7f7f7f7f)<<1 1280 add $acc14,$acc14,$acc14 1281 and $acc08,$acc08,$mask1b # (r1-(r1>>7))&0x1b1b1b1b 1282 and $acc10,$acc10,$mask1b 1283 xor $acc08,$acc08,$acc12 # r8 1284 xor $acc10,$acc10,$acc14 1285 1286 xor $acc00,$acc00,$s0 # r2^r0 1287 xor $acc02,$acc02,$s2 1288 xor $acc04,$acc04,$s0 # r4^r0 1289 xor $acc06,$acc06,$s2 1290 1291 extrdi $acc01,$acc00,32,0 1292 extrdi $acc03,$acc02,32,0 1293 extrdi $acc05,$acc04,32,0 1294 extrdi $acc07,$acc06,32,0 1295 extrdi $acc09,$acc08,32,0 1296 extrdi $acc11,$acc10,32,0 1297___ 1298$code.=<<___ if ($SIZE_T==4); 1299 and $acc00,$s0,$mask80 # r1=r0&0x80808080 1300 and $acc01,$s1,$mask80 1301 and $acc02,$s2,$mask80 1302 and $acc03,$s3,$mask80 1303 srwi $acc04,$acc00,7 # r1>>7 1304 andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f 1305 srwi $acc05,$acc01,7 1306 andc $acc09,$s1,$mask80 1307 srwi $acc06,$acc02,7 1308 andc $acc10,$s2,$mask80 1309 srwi $acc07,$acc03,7 1310 andc $acc11,$s3,$mask80 1311 sub $acc00,$acc00,$acc04 # r1-(r1>>7) 1312 sub $acc01,$acc01,$acc05 1313 sub $acc02,$acc02,$acc06 1314 sub $acc03,$acc03,$acc07 1315 add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1 1316 add $acc09,$acc09,$acc09 1317 add $acc10,$acc10,$acc10 1318 add $acc11,$acc11,$acc11 1319 and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b 1320 and $acc01,$acc01,$mask1b 1321 and $acc02,$acc02,$mask1b 1322 and $acc03,$acc03,$mask1b 1323 xor $acc00,$acc00,$acc08 # r2 1324 xor $acc01,$acc01,$acc09 1325 xor $acc02,$acc02,$acc10 1326 xor $acc03,$acc03,$acc11 1327 1328 and $acc04,$acc00,$mask80 # r1=r2&0x80808080 1329 and $acc05,$acc01,$mask80 1330 and $acc06,$acc02,$mask80 1331 and $acc07,$acc03,$mask80 1332 srwi $acc08,$acc04,7 # r1>>7 1333 andc $acc12,$acc00,$mask80 # r2&0x7f7f7f7f 1334 srwi $acc09,$acc05,7 1335 andc $acc13,$acc01,$mask80 1336 srwi $acc10,$acc06,7 1337 andc $acc14,$acc02,$mask80 1338 srwi $acc11,$acc07,7 1339 andc $acc15,$acc03,$mask80 1340 sub $acc04,$acc04,$acc08 # r1-(r1>>7) 1341 sub $acc05,$acc05,$acc09 1342 sub $acc06,$acc06,$acc10 1343 sub $acc07,$acc07,$acc11 1344 add $acc12,$acc12,$acc12 # (r2&0x7f7f7f7f)<<1 1345 add $acc13,$acc13,$acc13 1346 add $acc14,$acc14,$acc14 1347 add $acc15,$acc15,$acc15 1348 and $acc04,$acc04,$mask1b # (r1-(r1>>7))&0x1b1b1b1b 1349 and $acc05,$acc05,$mask1b 1350 and $acc06,$acc06,$mask1b 1351 and $acc07,$acc07,$mask1b 1352 xor $acc04,$acc04,$acc12 # r4 1353 xor $acc05,$acc05,$acc13 1354 xor $acc06,$acc06,$acc14 1355 xor $acc07,$acc07,$acc15 1356 1357 and $acc08,$acc04,$mask80 # r1=r4&0x80808080 1358 and $acc09,$acc05,$mask80 1359 srwi $acc12,$acc08,7 # r1>>7 1360 and $acc10,$acc06,$mask80 1361 srwi $acc13,$acc09,7 1362 and $acc11,$acc07,$mask80 1363 srwi $acc14,$acc10,7 1364 sub $acc08,$acc08,$acc12 # r1-(r1>>7) 1365 srwi $acc15,$acc11,7 1366 sub $acc09,$acc09,$acc13 1367 sub $acc10,$acc10,$acc14 1368 sub $acc11,$acc11,$acc15 1369 andc $acc12,$acc04,$mask80 # r4&0x7f7f7f7f 1370 andc $acc13,$acc05,$mask80 1371 andc $acc14,$acc06,$mask80 1372 andc $acc15,$acc07,$mask80 1373 add $acc12,$acc12,$acc12 # (r4&0x7f7f7f7f)<<1 1374 add $acc13,$acc13,$acc13 1375 add $acc14,$acc14,$acc14 1376 add $acc15,$acc15,$acc15 1377 and $acc08,$acc08,$mask1b # (r1-(r1>>7))&0x1b1b1b1b 1378 and $acc09,$acc09,$mask1b 1379 and $acc10,$acc10,$mask1b 1380 and $acc11,$acc11,$mask1b 1381 xor $acc08,$acc08,$acc12 # r8 1382 xor $acc09,$acc09,$acc13 1383 xor $acc10,$acc10,$acc14 1384 xor $acc11,$acc11,$acc15 1385 1386 xor $acc00,$acc00,$s0 # r2^r0 1387 xor $acc01,$acc01,$s1 1388 xor $acc02,$acc02,$s2 1389 xor $acc03,$acc03,$s3 1390 xor $acc04,$acc04,$s0 # r4^r0 1391 xor $acc05,$acc05,$s1 1392 xor $acc06,$acc06,$s2 1393 xor $acc07,$acc07,$s3 1394___ 1395$code.=<<___; 1396 rotrwi $s0,$s0,8 # = ROTATE(r0,8) 1397 rotrwi $s1,$s1,8 1398 xor $s0,$s0,$acc00 # ^= r2^r0 1399 rotrwi $s2,$s2,8 1400 xor $s1,$s1,$acc01 1401 rotrwi $s3,$s3,8 1402 xor $s2,$s2,$acc02 1403 xor $s3,$s3,$acc03 1404 xor $acc00,$acc00,$acc08 1405 xor $acc01,$acc01,$acc09 1406 xor $acc02,$acc02,$acc10 1407 xor $acc03,$acc03,$acc11 1408 xor $s0,$s0,$acc04 # ^= r4^r0 1409 rotrwi $acc00,$acc00,24 1410 xor $s1,$s1,$acc05 1411 rotrwi $acc01,$acc01,24 1412 xor $s2,$s2,$acc06 1413 rotrwi $acc02,$acc02,24 1414 xor $s3,$s3,$acc07 1415 rotrwi $acc03,$acc03,24 1416 xor $acc04,$acc04,$acc08 1417 xor $acc05,$acc05,$acc09 1418 xor $acc06,$acc06,$acc10 1419 xor $acc07,$acc07,$acc11 1420 xor $s0,$s0,$acc08 # ^= r8 [^((r4^r0)^(r2^r0)=r4^r2)] 1421 rotrwi $acc04,$acc04,16 1422 xor $s1,$s1,$acc09 1423 rotrwi $acc05,$acc05,16 1424 xor $s2,$s2,$acc10 1425 rotrwi $acc06,$acc06,16 1426 xor $s3,$s3,$acc11 1427 rotrwi $acc07,$acc07,16 1428 xor $s0,$s0,$acc00 # ^= ROTATE(r8^r2^r0,24) 1429 rotrwi $acc08,$acc08,8 1430 xor $s1,$s1,$acc01 1431 rotrwi $acc09,$acc09,8 1432 xor $s2,$s2,$acc02 1433 rotrwi $acc10,$acc10,8 1434 xor $s3,$s3,$acc03 1435 rotrwi $acc11,$acc11,8 1436 xor $s0,$s0,$acc04 # ^= ROTATE(r8^r4^r0,16) 1437 xor $s1,$s1,$acc05 1438 xor $s2,$s2,$acc06 1439 xor $s3,$s3,$acc07 1440 xor $s0,$s0,$acc08 # ^= ROTATE(r8,8) 1441 xor $s1,$s1,$acc09 1442 xor $s2,$s2,$acc10 1443 xor $s3,$s3,$acc11 1444 1445 b Ldec_compact_loop 1446.align 4 1447Ldec_compact_done: 1448 xor $s0,$s0,$t0 1449 xor $s1,$s1,$t1 1450 xor $s2,$s2,$t2 1451 xor $s3,$s3,$t3 1452 blr 1453 .long 0 1454 .byte 0,12,0x14,0,0,0,0,0 1455.size .AES_decrypt,.-.AES_decrypt 1456 1457.asciz "AES for PPC, CRYPTOGAMS by <appro\@openssl.org>" 1458.align 7 1459___ 1460 1461$code =~ s/\`([^\`]*)\`/eval $1/gem; 1462print $code; 1463close STDOUT or die "error closing STDOUT: $!"; 1464