📄 atl_cncmmijk.c
字号:
} else { r0mm_bX = Mjoin(Mjoin(Mjoin(NCmm0,TT),0x0x0),_aX_bX); r1mm = Mjoin(Mjoin(Mjoin(NCmm0,TT),0x0x0),_aX_bX); i0mm_bX = Mjoin(Mjoin(Mjoin(NCmm0,TT),0x0x0),_aX_bX); i1mm = Mjoin(Mjoin(Mjoin(NCmm0,TT),0x0x0),_aX_bX); } } mm_fixedKcu=Mjoin(Mjoin(Mjoin(NCmm00,Mjoin(0x0x,KB)),TT),0x0x0_aX_bX); mmcu = Mjoin(Mjoin(Mjoin(NCmm00,0x0x0),TT),0x0x0_aX_bX); } } else { if (TB == AtlasNoTrans) { ar0 = ai0 = ai1 = ralpha; if (AlphaIsOne) { r0mm_b1 = Mjoin(Mjoin(Mjoin(NCmm0,TN),0x0x0),_a1_bX); i0mm_b1 = Mjoin(Mjoin(Mjoin(NCmm0,TN),0x0x0),_a1_b1); if (BetaIsOne) { r1mm = r0mm_bX = Mjoin(Mjoin(Mjoin(NCmm0,TN),0x0x0),_a1_bX); i1mm = i0mm_bX = Mjoin(Mjoin(Mjoin(NCmm0,TN),0x0x0),_a1_b1); } else if (BetaIsZero) { i0mm_bX = r0mm_bX = Mjoin(Mjoin(Mjoin(NCmm0,TN),0x0x0),_a1_b0); i1mm = Mjoin(Mjoin(Mjoin(NCmm0,TN),0x0x0),_a1_b1); r1mm = Mjoin(Mjoin(Mjoin(NCmm0,TN),0x0x0),_a1_bX); } else { i0mm_bX=r1mm=r0mm_bX=Mjoin(Mjoin(Mjoin(NCmm0,TN),0x0x0),_a1_bX); i1mm = Mjoin(Mjoin(Mjoin(NCmm0,TN),0x0x0),_a1_b1); } } else { r0mm_b1 = Mjoin(Mjoin(Mjoin(NCmm0,TN),0x0x0),_aX_bX); i0mm_b1 = Mjoin(Mjoin(Mjoin(NCmm0,TN),0x0x0),_aX_bX); if (BetaIsOne) { r1mm = r0mm_bX = Mjoin(Mjoin(Mjoin(NCmm0,TN),0x0x0),_aX_bX); i1mm = i0mm_bX = Mjoin(Mjoin(Mjoin(NCmm0,TN),0x0x0),_aX_bX); } else if (BetaIsZero) { i0mm_bX = r0mm_bX = Mjoin(Mjoin(Mjoin(NCmm0,TN),0x0x0),_aX_b0); i1mm = Mjoin(Mjoin(Mjoin(NCmm0,TN),0x0x0),_aX_bX); r1mm = Mjoin(Mjoin(Mjoin(NCmm0,TN),0x0x0),_aX_bX); } else { i0mm_bX=r1mm=r0mm_bX=Mjoin(Mjoin(Mjoin(NCmm0,TN),0x0x0),_aX_bX); i1mm = Mjoin(Mjoin(Mjoin(NCmm0,TN),0x0x0),_aX_bX); } } mm_fixedKcu=Mjoin(Mjoin(Mjoin(NCmm00,Mjoin(0x0x,KB)),TN),0x0x0_aX_bX); mmcu = Mjoin(Mjoin(Mjoin(NCmm00,0x0x0),TN),0x0x0_aX_bX); } else if (TB == AtlasConjTrans) { ai1 = ar0 = -ralpha; ai0 = ralpha; if (AlphaIsOne) { r0mm_b1 = Mjoin(Mjoin(Mjoin(NCmm0,TT),0x0x0),_aX_bX); i0mm_b1 = Mjoin(Mjoin(Mjoin(NCmm0,TT),0x0x0),_a1_b1); if (BetaIsOne) { r0mm_bX = Mjoin(Mjoin(Mjoin(NCmm0,TT),0x0x0),_aX_bX); r1mm = Mjoin(Mjoin(Mjoin(NCmm0,TT),0x0x0),_a1_bX); i0mm_bX = Mjoin(Mjoin(Mjoin(NCmm0,TT),0x0x0),_a1_b1); i1mm = Mjoin(Mjoin(Mjoin(NCmm0,TT),0x0x0),_aX_bX); } else if (BetaIsZero) { r0mm_bX = Mjoin(Mjoin(Mjoin(NCmm0,TT),0x0x0),_aX_b0); r1mm = Mjoin(Mjoin(Mjoin(NCmm0,TT),0x0x0),_a1_bX); i0mm_bX = Mjoin(Mjoin(Mjoin(NCmm0,TT),0x0x0),_a1_b0); i1mm = Mjoin(Mjoin(Mjoin(NCmm0,TT),0x0x0),_aX_bX); } else { r0mm_bX = Mjoin(Mjoin(Mjoin(NCmm0,TT),0x0x0),_aX_bX); r1mm = Mjoin(Mjoin(Mjoin(NCmm0,TT),0x0x0),_a1_bX); i0mm_bX = Mjoin(Mjoin(Mjoin(NCmm0,TT),0x0x0),_a1_bX); i1mm = Mjoin(Mjoin(Mjoin(NCmm0,TT),0x0x0),_aX_bX); } } else { r0mm_b1 = Mjoin(Mjoin(Mjoin(NCmm0,TT),0x0x0),_aX_bX); i0mm_b1 = Mjoin(Mjoin(Mjoin(NCmm0,TT),0x0x0),_aX_bX); if (BetaIsOne) { r0mm_bX = Mjoin(Mjoin(Mjoin(NCmm0,TT),0x0x0),_aX_bX); r1mm = Mjoin(Mjoin(Mjoin(NCmm0,TT),0x0x0),_aX_bX); i0mm_bX = Mjoin(Mjoin(Mjoin(NCmm0,TT),0x0x0),_aX_bX); i1mm = Mjoin(Mjoin(Mjoin(NCmm0,TT),0x0x0),_aX_bX); } else if (BetaIsZero) { r0mm_bX = Mjoin(Mjoin(Mjoin(NCmm0,TT),0x0x0),_aX_b0); r1mm = Mjoin(Mjoin(Mjoin(NCmm0,TT),0x0x0),_aX_bX); i0mm_bX = Mjoin(Mjoin(Mjoin(NCmm0,TT),0x0x0),_aX_b0); i1mm = Mjoin(Mjoin(Mjoin(NCmm0,TT),0x0x0),_aX_bX); } else { r0mm_bX = Mjoin(Mjoin(Mjoin(NCmm0,TT),0x0x0),_aX_bX); r1mm = Mjoin(Mjoin(Mjoin(NCmm0,TT),0x0x0),_aX_bX); i0mm_bX = Mjoin(Mjoin(Mjoin(NCmm0,TT),0x0x0),_aX_bX); i1mm = Mjoin(Mjoin(Mjoin(NCmm0,TT),0x0x0),_aX_bX); } } mm_fixedKcu=Mjoin(Mjoin(Mjoin(NCmm00,Mjoin(0x0x,KB)),TT),0x0x0_aX_bX); mmcu = Mjoin(Mjoin(Mjoin(NCmm00,0x0x0),TT),0x0x0_aX_bX); } else { ar0 = ai0 = ai1 = ralpha; if (AlphaIsOne) { r0mm_b1 = Mjoin(Mjoin(Mjoin(NCmm0,TT),0x0x0),_a1_bX); i0mm_b1 = Mjoin(Mjoin(Mjoin(NCmm0,TT),0x0x0),_a1_b1); if (BetaIsOne) { r1mm = r0mm_bX = Mjoin(Mjoin(Mjoin(NCmm0,TT),0x0x0),_a1_bX); i1mm = i0mm_bX = Mjoin(Mjoin(Mjoin(NCmm0,TT),0x0x0),_a1_b1); } else if (BetaIsZero) { i0mm_bX = r0mm_bX = Mjoin(Mjoin(Mjoin(NCmm0,TT),0x0x0),_a1_b0); i1mm = Mjoin(Mjoin(Mjoin(NCmm0,TT),0x0x0),_a1_b1); r1mm = Mjoin(Mjoin(Mjoin(NCmm0,TT),0x0x0),_a1_bX); } else { i0mm_bX=r1mm=r0mm_bX=Mjoin(Mjoin(Mjoin(NCmm0,TT),0x0x0),_a1_bX); i1mm = Mjoin(Mjoin(Mjoin(NCmm0,TT),0x0x0),_a1_b1); } } else { r0mm_b1 = Mjoin(Mjoin(Mjoin(NCmm0,TT),0x0x0),_aX_bX); i0mm_b1 = Mjoin(Mjoin(Mjoin(NCmm0,TT),0x0x0),_aX_bX); if (BetaIsOne) { r1mm = r0mm_bX = Mjoin(Mjoin(Mjoin(NCmm0,TT),0x0x0),_aX_bX); i1mm = i0mm_bX = Mjoin(Mjoin(Mjoin(NCmm0,TT),0x0x0),_aX_bX); } else if (BetaIsZero) { i0mm_bX = r0mm_bX = Mjoin(Mjoin(Mjoin(NCmm0,TT),0x0x0),_aX_b0); i1mm = Mjoin(Mjoin(Mjoin(NCmm0,TT),0x0x0),_aX_bX); r1mm = Mjoin(Mjoin(Mjoin(NCmm0,TT),0x0x0),_aX_bX); } else { i0mm_bX=r1mm=r0mm_bX=Mjoin(Mjoin(Mjoin(NCmm0,TT),0x0x0),_aX_bX); i1mm = Mjoin(Mjoin(Mjoin(NCmm0,TT),0x0x0),_aX_bX); } } mm_fixedKcu=Mjoin(Mjoin(Mjoin(NCmm00,Mjoin(0x0x,KB)),TT),0x0x0_aX_bX); mmcu = Mjoin(Mjoin(Mjoin(NCmm00,0x0x0),TT),0x0x0_aX_bX); } } if (TA == AtlasNoTrans) { incAk = lda * (KB<<1); incAn = -Kb * incAk; incAm = MB<<1; } else { incAk = KB<<1; incAn = -Kb * incAk; incAm = (MB<<1) * lda; } if (TB == AtlasNoTrans) { incBk = KB<<1; incBn = (ldb*NB - K + kr)<<1; incBm = -((Nb * ldb * NB)<<1); } else { incBk = (KB<<1)*ldb; incBn = (NB<<1) - Kb*incBk; incBm = -Nb*(NB<<1); } for (i=Mb; i; i--, a += incAm, b += incBm, c += incCm) { for (j=Nb; j; j--, a += incAn, b += incBn, c += incCn) { if (Kb) { r0mm_bX(MB, NB, KB, ar0, a+1, lda, b+1, ldb, nrbeta, c, ldc); i0mm_bX(MB, NB, KB, ai0, a+1, lda, b, ldb, rbeta, c+1, ldc); r1mm(MB, NB, KB, ralpha, a, lda, b, ldb, ATL_rnone, c, ldc); i1mm(MB, NB, KB, ai1, a, lda, b+1, ldb, ATL_rone, c+1, ldc); a += incAk; b += incBk; for (k=Kb-1; k; k--, a += incAk, b += incBk) { r0mm_b1(MB, NB, KB, ar0, a+1, lda, b+1, ldb, ATL_rnone, c, ldc); i0mm_b1(MB, NB, KB, ai0, a+1, lda, b, ldb, ATL_rone, c+1, ldc); r1mm(MB, NB, KB, ralpha, a, lda, b, ldb, ATL_rnone, c, ldc); i1mm(MB, NB, KB, ai1, a, lda, b+1, ldb, ATL_rone, c+1, ldc); } if (kr) { mmcu(MB, NB, kr, ar0, a+1, lda, b+1, ldb, ATL_rnone, c, ldc); mmcu(MB, NB, kr, ai0, a+1, lda, b, ldb, ATL_rone, c+1, ldc); mmcu(MB, NB, kr, ralpha, a, lda, b, ldb, ATL_rnone, c, ldc); mmcu(MB, NB, kr, ai1, a, lda, b+1, ldb, ATL_rone, c+1, ldc); } } else if (kr) { if (BetaIsZero) Mjoin(PATL,gezero)(MB, NB, c, ldc); mmcu(MB, NB, kr, ar0, a+1, lda, b+1, ldb, nrbeta, c, ldc); mmcu(MB, NB, kr, ai0, a+1, lda, b, ldb, rbeta, c+1, ldc); mmcu(MB, NB, kr, ralpha, a, lda, b, ldb, ATL_rnone, c, ldc); mmcu(MB, NB, kr, ai1, a, lda, b+1, ldb, ATL_rone, c+1, ldc); } } } if (mr) /* M-loop remainder */ { for (j=Nb; j; j--, a += incAn, b += incBn, c += incCn) { if (BetaIsZero) Mjoin(PATL,gezero)(mr, NB, c, ldc); if (Kb) { mm_fixedKcu(mr, NB, KB, ar0, a+1, lda, b+1, ldb, nrbeta, c, ldc); mm_fixedKcu(mr, NB, KB, ai0, a+1, lda, b, ldb, rbeta, c+1, ldc); mm_fixedKcu(mr, NB, KB, ralpha, a, lda, b, ldb, ATL_rnone, c, ldc); mm_fixedKcu(mr, NB, KB, ai1, a, lda, b+1, ldb, ATL_rone, c+1, ldc); a += incAk; b += incBk; for (k=Kb-1; k; k--, a += incAk, b += incBk) { mm_fixedKcu(mr, NB, KB, ar0, a+1, lda, b+1, ldb, ATL_rnone, c, ldc); mm_fixedKcu(mr, NB, KB, ai0, a+1, lda, b, ldb, ATL_rone, c+1, ldc); mm_fixedKcu(mr, NB, KB, ralpha, a, lda, b, ldb, ATL_rnone, c, ldc); mm_fixedKcu(mr, NB, KB, ai1, a, lda, b+1, ldb, ATL_rone, c+1, ldc); } if (kr) { mmcu(mr, NB, kr, ar0, a+1, lda, b+1, ldb, ATL_rnone, c, ldc); mmcu(mr, NB, kr, ai0, a+1, lda, b, ldb, ATL_rone, c+1, ldc); mmcu(mr, NB, kr, ralpha, a, lda, b, ldb, ATL_rnone, c, ldc); mmcu(mr, NB, kr, ai1, a, lda, b+1, ldb, ATL_rone, c+1, ldc); } } else if (kr) { mmcu(mr, NB, kr, ar0, a+1, lda, b+1, ldb, nrbeta, c, ldc); mmcu(mr, NB, kr, ai0, a+1, lda, b, ldb, rbeta, c+1, ldc); mmcu(mr, NB, kr, ralpha, a, lda, b, ldb, ATL_rnone, c, ldc); mmcu(mr, NB, kr, ai1, a, lda, b+1, ldb, ATL_rone, c+1, ldc); } } } if (nr) Mjoin(PATL,NCmmJIK)(TA, TB, M, nr, K, alpha, A, lda, B+Nb*(incBn+Kb*incBk), ldb, beta, C+Nb*(NB<<1)*ldc, ldc); return(0);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -