1#! /usr/bin/env perl 2# Copyright 2007-2020 The OpenSSL Project Authors. All Rights Reserved. 3# 4# Licensed under the Apache License 2.0 (the "License"). You may not use 5# this file except in compliance with the License. You can obtain a copy 6# in the file LICENSE in the source distribution or at 7# https://www.openssl.org/source/license.html 8 9 10# ==================================================================== 11# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 12# project. The module is, however, dual licensed under OpenSSL and 13# CRYPTOGAMS licenses depending on where you obtain it. For further 14# details see http://www.openssl.org/~appro/cryptogams/. 15# ==================================================================== 16 17# sha1_block for Thumb. 18# 19# January 2007. 20# 21# The code does not present direct interest to OpenSSL, because of low 22# performance. Its purpose is to establish _size_ benchmark. Pretty 23# useless one I must say, because 30% or 88 bytes larger ARMv4 code 24# [available on demand] is almost _twice_ as fast. It should also be 25# noted that in-lining of .Lcommon and .Lrotate improves performance 26# by over 40%, while code increases by only 10% or 32 bytes. But once 27# again, the goal was to establish _size_ benchmark, not performance. 28 29$output=pop and open STDOUT,">$output"; 30 31$inline=0; 32#$cheat_on_binutils=1; 33 34$t0="r0"; 35$t1="r1"; 36$t2="r2"; 37$a="r3"; 38$b="r4"; 39$c="r5"; 40$d="r6"; 41$e="r7"; 42$K="r8"; # "upper" registers can be used in add/sub and mov insns 43$ctx="r9"; 44$inp="r10"; 45$len="r11"; 46$Xi="r12"; 47 48sub common { 49<<___; 50 sub $t0,#4 51 ldr $t1,[$t0] 52 add $e,$K @ E+=K_xx_xx 53 lsl $t2,$a,#5 54 add $t2,$e 55 lsr $e,$a,#27 56 add $t2,$e @ E+=ROR(A,27) 57 add $t2,$t1 @ E+=X[i] 58___ 59} 60sub rotate { 61<<___; 62 mov $e,$d @ E=D 63 mov $d,$c @ D=C 64 lsl $c,$b,#30 65 lsr $b,$b,#2 66 orr $c,$b @ C=ROR(B,2) 67 mov $b,$a @ B=A 68 add $a,$t2,$t1 @ A=E+F_xx_xx(B,C,D) 69___ 70} 71 72sub BODY_00_19 { 73$code.=$inline?&common():"\tbl .Lcommon\n"; 74$code.=<<___; 75 mov $t1,$c 76 eor $t1,$d 77 and $t1,$b 78 eor $t1,$d @ F_00_19(B,C,D) 79___ 80$code.=$inline?&rotate():"\tbl .Lrotate\n"; 81} 82 83sub BODY_20_39 { 84$code.=$inline?&common():"\tbl .Lcommon\n"; 85$code.=<<___; 86 mov $t1,$b 87 eor $t1,$c 88 eor $t1,$d @ F_20_39(B,C,D) 89___ 90$code.=$inline?&rotate():"\tbl .Lrotate\n"; 91} 92 93sub BODY_40_59 { 94$code.=$inline?&common():"\tbl .Lcommon\n"; 95$code.=<<___; 96 mov $t1,$b 97 and $t1,$c 98 mov $e,$b 99 orr $e,$c 100 and $e,$d 101 orr $t1,$e @ F_40_59(B,C,D) 102___ 103$code.=$inline?&rotate():"\tbl .Lrotate\n"; 104} 105 106$code=<<___; 107.text 108.code 16 109 110.global sha1_block_data_order 111.type sha1_block_data_order,%function 112 113.align 2 114sha1_block_data_order: 115___ 116if ($cheat_on_binutils) { 117$code.=<<___; 118.code 32 119 add r3,pc,#1 120 bx r3 @ switch to Thumb ISA 121.code 16 122___ 123} 124$code.=<<___; 125 push {r4-r7} 126 mov r3,r8 127 mov r4,r9 128 mov r5,r10 129 mov r6,r11 130 mov r7,r12 131 push {r3-r7,lr} 132 lsl r2,#6 133 mov $ctx,r0 @ save context 134 mov $inp,r1 @ save inp 135 mov $len,r2 @ save len 136 add $len,$inp @ $len to point at inp end 137 138.Lloop: 139 mov $Xi,sp 140 mov $t2,sp 141 sub $t2,#16*4 @ [3] 142.LXload: 143 ldrb $a,[$t1,#0] @ $t1 is r1 and holds inp 144 ldrb $b,[$t1,#1] 145 ldrb $c,[$t1,#2] 146 ldrb $d,[$t1,#3] 147 lsl $a,#24 148 lsl $b,#16 149 lsl $c,#8 150 orr $a,$b 151 orr $a,$c 152 orr $a,$d 153 add $t1,#4 154 push {$a} 155 cmp sp,$t2 156 bne .LXload @ [+14*16] 157 158 mov $inp,$t1 @ update $inp 159 sub $t2,#32*4 160 sub $t2,#32*4 161 mov $e,#31 @ [+4] 162.LXupdate: 163 ldr $a,[sp,#15*4] 164 ldr $b,[sp,#13*4] 165 ldr $c,[sp,#7*4] 166 ldr $d,[sp,#2*4] 167 eor $a,$b 168 eor $a,$c 169 eor $a,$d 170 ror $a,$e 171 push {$a} 172 cmp sp,$t2 173 bne .LXupdate @ [+(11+1)*64] 174 175 ldmia $t0!,{$a,$b,$c,$d,$e} @ $t0 is r0 and holds ctx 176 mov $t0,$Xi 177 178 ldr $t2,.LK_00_19 179 mov $t1,$t0 180 sub $t1,#20*4 181 mov $Xi,$t1 182 mov $K,$t2 @ [+7+4] 183.L_00_19: 184___ 185 &BODY_00_19(); 186$code.=<<___; 187 cmp $Xi,$t0 188 bne .L_00_19 @ [+(2+9+4+2+8+2)*20] 189 190 ldr $t2,.LK_20_39 191 mov $t1,$t0 192 sub $t1,#20*4 193 mov $Xi,$t1 194 mov $K,$t2 @ [+5] 195.L_20_39_or_60_79: 196___ 197 &BODY_20_39(); 198$code.=<<___; 199 cmp $Xi,$t0 200 bne .L_20_39_or_60_79 @ [+(2+9+3+2+8+2)*20*2] 201 cmp sp,$t0 202 beq .Ldone @ [+2] 203 204 ldr $t2,.LK_40_59 205 mov $t1,$t0 206 sub $t1,#20*4 207 mov $Xi,$t1 208 mov $K,$t2 @ [+5] 209.L_40_59: 210___ 211 &BODY_40_59(); 212$code.=<<___; 213 cmp $Xi,$t0 214 bne .L_40_59 @ [+(2+9+6+2+8+2)*20] 215 216 ldr $t2,.LK_60_79 217 mov $Xi,sp 218 mov $K,$t2 219 b .L_20_39_or_60_79 @ [+4] 220.Ldone: 221 mov $t0,$ctx 222 ldr $t1,[$t0,#0] 223 ldr $t2,[$t0,#4] 224 add $a,$t1 225 ldr $t1,[$t0,#8] 226 add $b,$t2 227 ldr $t2,[$t0,#12] 228 add $c,$t1 229 ldr $t1,[$t0,#16] 230 add $d,$t2 231 add $e,$t1 232 stmia $t0!,{$a,$b,$c,$d,$e} @ [+20] 233 234 add sp,#80*4 @ deallocate stack frame 235 mov $t0,$ctx @ restore ctx 236 mov $t1,$inp @ restore inp 237 cmp $t1,$len 238 beq .Lexit 239 b .Lloop @ [+6] total 3212 cycles 240.Lexit: 241 pop {r2-r7} 242 mov r8,r2 243 mov r9,r3 244 mov r10,r4 245 mov r11,r5 246 mov r12,r6 247 mov lr,r7 248 pop {r4-r7} 249 bx lr 250.align 2 251___ 252$code.=".Lcommon:\n".&common()."\tmov pc,lr\n" if (!$inline); 253$code.=".Lrotate:\n".&rotate()."\tmov pc,lr\n" if (!$inline); 254$code.=<<___; 255.align 2 256.LK_00_19: .word 0x5a827999 257.LK_20_39: .word 0x6ed9eba1 258.LK_40_59: .word 0x8f1bbcdc 259.LK_60_79: .word 0xca62c1d6 260.size sha1_block_data_order,.-sha1_block_data_order 261.asciz "SHA1 block transform for Thumb, CRYPTOGAMS by <appro\@openssl.org>" 262___ 263 264print $code; 265close STDOUT or die "error closing STDOUT: $!"; # enforce flush 266