Lines Matching refs:T0
346 my ($H0,$H1,$H2,$H3,$H4, $T0,$T1,$T2,$T3,$T4, $D0,$D1,$D2,$D3,$D4, $MASK) =
814 vmovdqu 16*2($inp),$T0
818 vpsrldq \$6,$T0,$T2 # splat input
820 vpunpckhqdq $T1,$T0,$T4 # 4
821 vpunpcklqdq $T1,$T0,$T0 # 0:1
825 vpsrlq \$26,$T0,$T1
826 vpand $MASK,$T0,$T0 # 0
907 vpmuludq $T0,$D4,$D0 # d0 = h0*r0
928 vpmuludq $T0,$H2,$H2 # h0*r1
935 vpmuludq $T0,$H3,$H3 # h0*r2
947 vpmuludq $T0,$H2,$H2 # h0*r3
967 vpmuludq 0x70(%rsp),$T0,$T4 # h0*r4
968 vpmuludq $T1,$H4,$T0 # h1*s4
972 vpaddq $T0,$D0,$D0 # d0 += h1*s4
1009 vpmuludq $H0,$T4,$T0 # h0*r0
1011 vpaddq $T0,$D0,$D0
1014 vpmuludq $H2,$T4,$T0 # h2*r0
1016 vpaddq $T0,$D2,$D2
1019 vpmuludq -0x70(%r11),$H4,$T0 # h4*s1
1022 vpaddq $T0,$D0,$D0 # d0 += h4*s1
1024 vpmuludq $H3,$T2,$T0 # h3*r1
1027 vpaddq $T0,$D4,$D4 # d4 += h3*r1
1034 vpmuludq $H2,$T3,$T0 # h2*r2
1036 vpaddq $T0,$D4,$D4 # d4 += h2*r2
1040 vpmuludq $H4,$T4,$T0 # h4*s2
1042 vpaddq $T0,$D1,$D1 # d1 += h4*s2
1051 vpmuludq $H4,$T3,$T0 # h4*s3
1053 vpaddq $T0,$D2,$D2 # d2 += h4*s3
1054 vmovdqu 16*2($inp),$T0 # load input
1063 vpsrldq \$6,$T0,$T2 # splat input
1071 vpunpckhqdq $T1,$T0,$T4 # 4
1075 vpunpcklqdq $T1,$T0,$T0 # 0:1
1080 vpsrlq \$26,$T0,$T1
1082 vpand $MASK,$T0,$T0 # 0
1137 vpaddq $H0,$T0,$T0
1156 vpmuludq $T0,$D4,$D0 # d0 = h0*r0
1170 vpmuludq $T0,$H2,$H2 # h0*r1
1181 vpmuludq $T0,$H4,$H4 # h0*r2
1191 vpmuludq $T0,$H3,$H3 # h0*r3
1202 vpmuludq $T0,$H2,$H2 # h0*r4
1244 vpmuludq $H0,$T4,$T0 # h0*r0
1245 vpaddq $T0,$D0,$D0 # d0 += h0*r0
1248 vpmuludq $H2,$T4,$T0 # h2*r0
1249 vpaddq $T0,$D2,$D2 # d2 += h2*r0
1256 vpmuludq $H3,$T2,$T0 # h3*r1
1257 vpaddq $T0,$D4,$D4 # d4 += h3*r1
1262 vpmuludq $H1,$T2,$T0 # h1*r1
1263 vpaddq $T0,$D2,$D2 # d2 += h1*r1
1272 vpmuludq $H1,$T4,$T0 # h1*r2
1273 vpaddq $T0,$D3,$D3 # d3 += h1*r2
1283 vpmuludq $H1,$T3,$T0 # h1*r3
1284 vpaddq $T0,$D4,$D4 # d4 += h1*r3
1291 vpmuludq $H3,$T4,$T0 # h3*s3
1292 vpaddq $T0,$D1,$D1 # d1 += h3*s3
1300 vpmuludq $H3,$T3,$T0 # h3*s4
1301 vpaddq $T0,$D2,$D2 # d2 += h3*s4
1314 vpsrldq \$8,$D0,$T0
1318 vpaddq $T0,$D0,$D0
1446 my ($H0,$H1,$H2,$H3,$H4, $MASK, $T4,$T0,$T1,$T2,$T3, $D0,$D1,$D2,$D3,$D4) =
1754 vmovdqa 96(%rcx),$T0 # .Lpermd_avx2
1766 vpermd $T2,$T0,$T2 # 00003412 -> 14243444
1768 vpermd $T3,$T0,$T3
1770 vpermd $T4,$T0,$T4
1772 vpermd $D0,$T0,$D0
1774 vpermd $D1,$T0,$D1
1776 vpermd $D2,$T0,$D2
1778 vpermd $D3,$T0,$D3
1780 vpermd $D4,$T0,$D4
1782 vpermd $MASK,$T0,$MASK
1790 vmovdqu 16*0($inp),%x#$T0
1792 vinserti128 \$1,16*2($inp),$T0,$T0
1796 vpsrldq \$6,$T0,$T2 # splat input
1798 vpunpckhqdq $T1,$T0,$T4 # 4
1800 vpunpcklqdq $T1,$T0,$T0 # 0:1
1804 vpsrlq \$26,$T0,$T1
1807 vpand $MASK,$T0,$T0 # 0
1827 vpaddq $H0,$T0,$H0
1828 vmovdqa `32*0`(%rsp),$T0 # r0^4
1852 vpmuludq $H2,$T0,$D2 # d2 = h2*r0
1868 vpmuludq $H0,$T0,$T4 # h0*r0
1869 vpmuludq $H1,$T0,$H2 # h1*r0
1872 vpmuludq $H3,$T0,$T4 # h3*r0
1873 vpmuludq $H4,$T0,$H2 # h4*r0
1874 vmovdqu 16*0($inp),%x#$T0 # load input
1877 vinserti128 \$1,16*2($inp),$T0,$T0
1894 vpsrldq \$6,$T0,$T2 # splat input
1902 vpunpckhqdq $T1,$T0,$T4 # 4
1906 vpunpcklqdq $T1,$T0,$T0 # 0:1
1941 vpsrlq \$26,$T0,$T1
1960 vpand $MASK,$T0,$T0 # 0
1977 vpaddq $H0,$T0,$H0
1978 vmovdqu `32*0+4`(%rsp),$T0 # r0^4
1987 vpmuludq $H2,$T0,$D2 # d2 = h2*r0
2002 vpmuludq $H0,$T0,$T4 # h0*r0
2003 vpmuludq $H1,$T0,$H2 # h1*r0
2007 vpmuludq $H3,$T0,$T4 # h3*r0
2008 vpmuludq $H4,$T0,$H2 # h4*r0
2048 vpsrldq \$8,$H0,$T0
2053 vpaddq $T0,$H0,$H0
2057 vpermq \$0x2,$H0,$T0
2062 vpaddq $T0,$H0,$H0
2142 map(s/%y/%z/,($T4,$T0,$T1,$T2,$T3)); # switch to %zmm domain
2187 vmovdqu `16*2-64`($ctx),%x#$T0 # ... ${S1}
2197 vpermd $T0,$T2,$S1
2200 vpsrlq \$32,$R0,$T0 # 14243444 -> 01020304
2225 vpmuludq $T0,$R0,$D0 # d0 = r0'*r0
2226 vpmuludq $T0,$R1,$D1 # d1 = r0'*r1
2227 vpmuludq $T0,$R2,$D2 # d2 = r0'*r2
2228 vpmuludq $T0,$R3,$D3 # d3 = r0'*r3
2229 vpmuludq $T0,$R4,$D4 # d4 = r0'*r4
2322 vpunpcklqdq $T4,$T3,$T0 # transpose input
2357 vpsrlq \$52,$T0,$T2 # splat input
2360 vpsrlq \$26,$T0,$T1
2364 vpandq $MASK,$T0,$T0 # 0
2405 vpaddq $H0,$T0,$H0
2438 vpunpcklqdq $T4,$T3,$T0 # transpose input
2471 vpsrlq \$52,$T0,$T2 # splat input
2498 vpsrlq \$26,$T0,$T1
2516 vpandq $MASK,$T0,$T0 # 0
2545 vpaddq $H0,$T0,$H0
2559 vmovdqu 16*0($inp),%x#$T0
2579 vinserti128 \$1,16*2($inp),%y#$T0,%y#$T0
2646 map(s/%z/%y/,($T0,$T1,$T2,$T3,$T4, $PADBIT));
2654 vpsrldq \$6,$T0,$T2 # splat input
2656 vpunpckhqdq $T1,$T0,$T4 # 4
2662 vpunpcklqdq $T1,$T0,$T0 # 0:1
2676 vpsrlq \$26,$T0,$T1
2683 vpand $MASK,$T0,$T0 # 0
2809 my ($T0,$inp_permd,$inp_shift,$PAD) = map("%ymm$_",(18..21));
2864 vmovdqu32 0($inp),%x#$T0 # load input as ----3210
2867 vpermd $T0,$inp_permd,$T0 # ----3210 -> --322110
2868 vpsrlvq $inp_shift,$T0,$T0
2869 vpandq $reduc_mask,$T0,$T0
2870 vporq $PAD,$T0,$T0
2872 vpaddq $T0,$Dlo,$Dlo # accumulate input
2890 vpsrlvq $reduc_rght,$Dlo,$T0 # 0 in topmost qword
2894 vpaddq $T0,$Dhi,$Dhi
2900 vpsrlvq $reduc_rght,$Dlo,$T0 # 0 in topmost word
2903 vpermq \$0b10010011,$T0,$T0
2905 vpaddq $T0,$Dlo,$Dlo
2907 vpermq \$0b10010011,$Dlo,${T0}{%k1}{z}
2909 vpaddq $T0,$Dlo,$Dlo
2910 vpsllq \$2,$T0,$T0
2912 vpaddq $T0,$Dlo,$Dlo
2936 my ($T0,$T1,$T2,$T3,$mask44,$mask42,$tmp,$PAD) = map("%ymm$_",(24..31));
2993 vpandq $mask44,$T1,$T0
3163 vpandq $mask44,$T1,$T0
3175 vpaddq $T0,$H0,$H0
3226 vpandq $mask44,$T1,$T0
3264 vpaddq $T0,$H0,$H0
3299 vpsrldq \$8,$D0lo,$T0
3303 vpaddq $T0,$D0lo,$D0lo
3309 vpermq \$0x2,$D0lo,$T0
3316 vpaddq $T0,$D0lo,${D0lo}{%k1}{z}
3379 my ($T0,$T1,$T2,$T3,$mask44,$mask42,$tmp,$PAD) = map("%ymm$_",(24..31));
3484 vpunpcklqdq $R0,$RR0,$T0
3492 map(s/%y/%z/, $T0,$T1,$T2,$T3,$mask44,$mask42,$tmp,$PAD);
3497 vshufi64x2 \$0x44,$R0,$T0,$RR0
3529 vpandq $mask44,$T1,$T0
3542 vpaddq $T0,$H0,$H0
3593 vpandq $mask44,$T1,$T0
3621 vpaddq $T0,$H0,$H0
3656 vpsrldq \$8,$D0lo,$T0
3660 vpaddq $T0,$D0lo,$D0lo
3666 vpermq \$0x2,$D0lo,$T0
3673 vpaddq $T0,$D0lo,$D0lo
3679 vextracti64x4 \$1,$D0lo,%y#$T0
3692 map(s/%z/%y/, $T0,$T1,$T2,$T3,$mask44,$mask42,$tmp,$PAD);
3695 vpaddq $T0,$D0lo,${D0lo}{%k1}{z}