⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 atl_agemmxx.c

📁 基于Blas CLapck的.用过的人知道是干啥的
💻 C
字号:
/* *             Automatically Tuned Linear Algebra Software v3.8.0 *                    (C) Copyright 1997 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: *   1. Redistributions of source code must retain the above copyright *      notice, this list of conditions and the following disclaimer. *   2. Redistributions in binary form must reproduce the above copyright *      notice, this list of conditions, and the following disclaimer in the *      documentation and/or other materials provided with the distribution. *   3. The name of the ATLAS group or the names of its contributers may *      not be used to endorse or promote products derived from this *      software without specific written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ATLAS GROUP OR ITS CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * */#include "atlas_misc.h"#include "atlas_lvl3.h"#include <stddef.h>#ifdef NoTransA_   #define ETA AtlasNoTrans   #define TA_ N   #define ta_ n   #define nA_ K   #define incA (NB SHIFT)   #define A2BLK Mjoin(PATL,row2blkT)   #define A_M_ M   #define A_N_ K#elif defined(ConjTransA_)   #define ETA AtlasConjTrans   #define TA_ C   #define ta_ c   #define nA_ M   #define incA ATL_MulByNB(lda<<1)   #define A2BLK Mjoin(PATL,col2blkConj)   #define A_M_ K   #define A_N_ M#else   #define ETA AtlasTrans   #define TA_ T   #define ta_ t   #define nA_ M   #define incA ATL_MulByNB(lda SHIFT)   #define A2BLK Mjoin(PATL,col2blk)   #define A_M_ K   #define A_N_ M#endif#ifdef NoTransB_   #define ETB AtlasNoTrans   #define TB_ N   #define tb_ n   #define incB ATL_MulByNB(ldb SHIFT)   #define nB_ N   #define B2BLK Mjoin(PATL,col2blk)   #define B_M_ K   #define B_N_ N#elif defined(ConjTransB_)   #define ETB AtlasConjTrans   #define TB_ C   #define tb_ c   #define nB_ K   #define B2BLK Mjoin(PATL,row2blkC)   #define B_M_ N   #define B_N_ K   #define incB NB2#else   #define ETB AtlasTrans   #define TB_ T   #define tb_ t   #define nB_ K   #define B2BLK Mjoin(PATL,row2blkT)   #define B_M_ N   #define B_N_ K   #define incB (NB SHIFT)#endif#define tatb Mjoin(ta_,tb_)#define TATB Mjoin(TA_,TB_)void Mjoin(Mjoin(PATL,aliased_gemm),TATB)   (const int M, const int N, const int K, const SCALAR alpha,    const TYPE *A, const int lda, const TYPE *B, const int ldb,    const SCALAR beta, TYPE *C, const int ldc)/* * Does right thing when output matrix is aliased with one or more of the input * matrices */{   const ptrdiff_t  bA=(ptrdiff_t)A, bB=(ptrdiff_t)B, bC=(ptrdiff_t)C;   const ptrdiff_t eA = bA + ATL_MulBySize(nA_*lda);   const ptrdiff_t eB = bB + ATL_MulBySize(nB_*ldb);   const ptrdiff_t eC = bC + ATL_MulBySize(N*ldc);   const int AC_aliased = ( (bA <= bC && eA >= bC) || (bC <= bA && eC >= bA) );   const int BC_aliased = ( (bB <= bC && eB >= bC) || (bC <= bB && eC >= bB) );   const int nMb=ATL_DivByNB(M), nNb=ATL_DivByNB(N), nKb=ATL_DivByNB(K);   const int ib = M - ATL_MulByNB(nMb);   const int jb = N - ATL_MulByNB(nNb);   const int kb = K - ATL_MulByNB(nKb);   void *vA=NULL, *vB=NULL;   TYPE *pA, *pB;   NBMM0 NBmm0;   MAT2BLK mat2blk;   #ifdef TCPLX      MATSCAL gescal;   #endif#ifdef USERGEMM   int ldpb=ldb, ldpa=lda;   pA = (TYPE *) A; pB = (TYPE *) B;   if (BC_aliased)   {      vB = malloc(ATL_Cachelen + ATL_MulBySize(N*K));      ATL_assert(vB);      pB = ATL_AlignPtr(vB);      #ifdef NoTransB_         Mjoin(PATL,gecopy)(K, N, B, ldb, pB, K);         ldpb = K;      #else         Mjoin(PATL,gecopy)(N, K, B, ldb, pB, N);         ldpb = N;      #endif    }    if (AC_aliased)    {       vA = malloc(ATL_Cachelen + ATL_MulBySize(M*K));       ATL_assert(vA);       pA = ATL_AlignPtr(vA);       #ifdef NoTransA_          Mjoin(PATL,gecopy)(M, K, A, lda, pA, M);          ldpa = M;       #else          Mjoin(PATL,gecopy)(K, M, A, lda, pA, K);          ldpa = K;       #endif    }    if (Mjoin(PATU,usergemm)(ETA, ETB, M, N, K, alpha, pA, ldpa, pB, ldpb,                             beta, C, ldc))       ATL_assert(Mjoin(PATL,NCmmJIK)(ETA, ETB, M, N, K, alpha, pA, ldpa,                                      pB, ldpb, beta, C, ldc)==0);#else   #ifdef TCPLX      if (beta[1] == ATL_rzero)      {         gescal = NULL;         if (*beta == ATL_rone) NBmm0 = Mjoin(PATL,CNBmm_b1);         else if (*beta == ATL_rzero) NBmm0 = Mjoin(PATL,CNBmm_b0);         else NBmm0 = Mjoin(PATL,CNBmm_bX);      }      else      {         NBmm0 = Mjoin(PATL,CNBmm_b1);         gescal = Mjoin(PATL,gescal_bX);      }   #else      if ( SCALAR_IS_ONE(beta) ) NBmm0 = NBmm_b1;      else if ( SCALAR_IS_ZERO(beta) ) NBmm0 = NBmm_b0;      else NBmm0 = NBmm_bX;   #endif   if (N >= M)  /* B on outside, A in inner loop */   {/* *    If B & C are aliased, we will need to copy entire matrix up front *    unless B is NoTranspose, and B & C start at the same place */      if (BC_aliased)      {         #ifdef NoTransB_            if (B != C || ldb != ldc)            {         #endif         vB = malloc(ATL_Cachelen + ATL_MulBySize(N*K));         ATL_assert(vB);         pB = ATL_AlignPtr(vB);         Mjoin(B2BLK,2_a1)(B_M_, B_N_, B, ldb, pB, alpha);         mat2blk = NULL;         B = NULL;         #ifdef NoTransB_            }         #endif      }/* *    Otherwise, we can copy B column panel at a time */      if (vB == NULL)      {/* *       See if we can avoid the copy of B */         #if defined(NoTransB_) && defined(TREAL)            if (!BC_aliased && ldb == NB && K == NB)            {               pB = (TYPE *) B;               mat2blk = NULL;               B = NULL;            }            else            {         #endif         vB = malloc(ATL_Cachelen + ATL_MulBySize(NB*K));         ATL_assert(vB);         pB = ATL_AlignPtr(vB);         mat2blk = Mjoin(B2BLK,_a1);         #if defined(NoTransB_) && defined(TREAL)            }         #endif      }/* *    See if it is possible to avoid copying A */      #if !defined(NoTransA_) && defined(TREAL)         if ( !AC_aliased && lda == NB && K == NB && SCALAR_IS_ONE(alpha) )            pA = (TYPE *) A;         else         {      #endif         vA = malloc(ATL_Cachelen + ATL_MulBySize(M*K));         ATL_assert(vA);         pA = ATL_AlignPtr(vA);         if ( SCALAR_IS_ONE(alpha) )            Mjoin(A2BLK,2_a1)(A_M_, A_N_, A, lda, pA, alpha);         else Mjoin(A2BLK,2_aX)(A_M_, A_N_, A, lda, pA, alpha);      #if !defined(NoTransA_) && defined(TREAL)         }      #endif      #ifdef TREAL         Mjoin(PATL,mmJIK2)(K, nMb, nNb, nKb, ib, jb, kb, alpha, pA,                              B, ldb, pB, incB, mat2blk, beta,                              C, ldc, C, NULL, NBmm0);      #else         Mjoin(PATL,mmJIK2)(K, nMb, nNb, nKb, ib, jb, kb, alpha, pA,                            B, ldb, pB, incB, mat2blk, beta,                            C, ldc, gescal, NBmm0);      #endif   }   else  /* put A as outer matrix, B as inner */   {/* *    If A & C are aliased, we will need to copy entire matrix up front *    unless A is NoTranspose, and A & C start at the same place, with the same *    stride between columns */      if (AC_aliased)      {         #ifdef NoTransA_            if (A != C || lda != ldc)            {         #endif         vA = malloc(ATL_Cachelen + ATL_MulBySize(M*K));         ATL_assert(vA);         pA = ATL_AlignPtr(vA);         Mjoin(A2BLK,2_a1)(A_M_, A_N_, A, lda, pA, alpha);         mat2blk = NULL;         A = NULL;         #ifdef NoTransA_            }         #endif      }/* *    Otherwise, we can copy A row panel at a time */      if (vA == NULL)      {/* *       See if we can avoid the copy of A */         #if !defined(NoTransA_) && defined(TREAL)            if (!AC_aliased && lda == NB && K == NB)            {               pA = (TYPE *) A;               mat2blk = NULL;               A = NULL;            }            else            {         #endif         vA = malloc(ATL_Cachelen + ATL_MulBySize(NB*K));         ATL_assert(vA);         pA = ATL_AlignPtr(vA);         mat2blk = Mjoin(A2BLK,_a1);         #if !defined(NoTransA_) && defined(TREAL)            }         #endif      }/* *    See if it is possible to avoid copying B */      #if defined(NoTransB_) && defined(TREAL)         if (!BC_aliased && ldb == NB && K == NB && alpha == 1.0)            pB = (TYPE *) B;         else         {      #endif         vB = malloc(ATL_Cachelen + ATL_MulBySize(N*K));         ATL_assert(vB);         pB = ATL_AlignPtr(vB);         if ( SCALAR_IS_ONE(alpha) )              Mjoin(B2BLK,2_a1)(B_M_, B_N_, B, ldb, pB, alpha);         else Mjoin(B2BLK,2_aX)(B_M_, B_N_, B, ldb, pB, alpha);      #if defined(NoTransB_) && defined(TREAL)         }      #endif      #ifdef TREAL         Mjoin(PATL,mmIJK2)(K, nMb, nNb, nKb, ib, jb, kb, alpha, A, lda, pA,                              incA, mat2blk, pB, beta, C, ldc, C, NULL, NBmm0);      #else         Mjoin(PATL,mmIJK2)(K, nMb, nNb, nKb, ib, jb, kb, alpha, A, lda, pA,                            incA, mat2blk, pB, beta, C, ldc, gescal, NBmm0);      #endif   }#endif   if (vA) free(vA);   if (vB) free(vB);}#undef tatb#undef TATB#undef ETA#undef TA_#undef ta_#undef incA#undef nA_#undef A2BLK#undef A_M_#undef A_N_#undef incA#undef ETB#undef TB_#undef tb_#undef incB#undef nB_#undef B2BLK#undef B_M_#undef B_N_

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -