⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 atl_row2blkt.c

📁 基于Blas CLapck的.用过的人知道是干啥的
💻 C
字号:
/* *             Automatically Tuned Linear Algebra Software v3.8.0 *                    (C) Copyright 1997 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: *   1. Redistributions of source code must retain the above copyright *      notice, this list of conditions and the following disclaimer. *   2. Redistributions in binary form must reproduce the above copyright *      notice, this list of conditions, and the following disclaimer in the *      documentation and/or other materials provided with the distribution. *   3. The name of the ATLAS group or the names of its contributers may *      not be used to endorse or promote products derived from this *      software without specific written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ATLAS GROUP OR ITS CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * */#include "atlas_misc.h"#include "atlas_lvl3.h"#include "atlas_prefetch.h"#define ATL_row2blkT_NB Mjoin(Mjoin(PATL,row2blkT_NB),NM)#define ATL_row2blkT_KB Mjoin(Mjoin(PATL,row2blkT_KB),NM)#define ATL_row2blkT Mjoin(Mjoin(PATL,row2blkT),NM)#define ATL_row2blkT2 Mjoin(Mjoin(PATL,row2blkT2),NM)#if defined (DREAL) && defined(ATL_GAS_x8664) && 0void ATL_row2blkT_NB(const int M, const int N, const TYPE *A, const int lda,                     TYPE *V, const TYPE alpha);#elsestatic void ATL_row2blkT_NB   (const int M, const int N, const TYPE *A, const int lda, TYPE *V,    const TYPE alpha0)/* * copy where M & N are NB, compiler should be able to completely unroll */{   const int lda2 = lda<<1;   int i, j;   TYPE *v=V;   const TYPE *pA0 = A, *pA1 = A + lda;   const register TYPE alpha=alpha0;   #ifdef ATL_AltiVec      static int cwrd=0;      if (cwrd) goto L1;      i = 1; /* one block unless NB is too big */      j = ATL_MulBySize(NB)>>4;      while (j > 32) { j >>= 1; i <<= 1; }      if (j == 32) j = 0;      cwrd = ATL_GetCtrl(j<<4, i, j);L1:      ATL_pfavR(pA0, cwrd, 2);      ATL_pfavR(pA1, cwrd, 3);   #endif#if (NB/2)*2 != NB  /* ATLAS should ensure NB is divisable by two */   assert((NB/2)*2 == NB);#endif   for (j=NB; j; j -= 2)   {      #ifdef ATL_AltiVec         ATL_pfavR(pA0+lda2, cwrd, 0);         ATL_pfavR(pA1+lda2, cwrd, 1);      #endif      for (i=0; i != NB; i++, v += NB)      {         *v = ATL_MulByALPHA(pA0[i]);         v[1] = ATL_MulByALPHA(pA1[i]);      }      V += 2;      v = V;      pA0 += lda2;      pA1 += lda2;   }}#endifstatic void ATL_row2blkT_KB   (const int M, const int N, const TYPE *A, const int lda, TYPE *V,    const TYPE alpha0){   const int n = N >> 1, lda2 = lda<<1;   int i, j;   TYPE *v=V;   const TYPE *pA0 = A, *pA1 = A + lda;   const register TYPE alpha=alpha0;   for (j=n; j; j--)   {      for (i=0; i != M; i++, v += N)      {         *v = ATL_MulByALPHA(pA0[i]);         v[1] = ATL_MulByALPHA(pA1[i]);      }      V += 2;      v = V;      pA0 += lda2;      pA1 += lda2;   }   if ((n<<1) != N)      for (i=0; i != M; i++, v += N) *v = ATL_MulByALPHA(pA0[i]);}void ATL_row2blkT(const int N, const int nb, const TYPE *A, const int lda,                  TYPE *V, const SCALAR alpha)/* * A is a nbxN matrix, v is a N*nb length vector. * v receives trans(A) in block major order. */{   const int Nb = ATL_DivByNB(N), incA = ATL_MulByNB(lda);   const int incV = ATL_MulByNB(nb);   int k;   if (nb == NB)      for (k=0; k != Nb; k++, A += incA, V += incV)         ATL_row2blkT_NB(Nb, NB, A, lda, V, alpha);   else      for (k=0; k != Nb; k++, A += incA, V += incV)         ATL_row2blkT_KB(nb, NB, A, lda, V, alpha);   if (k = N - ATL_MulByNB(Nb))      ATL_row2blkT_KB(nb, k, A, lda, V, alpha);}void ATL_row2blkT2(const int M, const int N, const TYPE *A, const int lda,                   TYPE *V, const SCALAR alpha){   const int Mb = ATL_DivByNB(M), Nb = ATL_DivByNB(N);   const int mr = M - ATL_MulByNB(Mb), nr = N - ATL_MulByNB(Nb);   const int incV = ATL_MulByNB(N), incA = ATL_MulByNB(lda) - M + mr;   const int incVV = ATL_MulByNB(mr);   int i, j;   TYPE *v=V, *vv = V+Mb*incV;   for (j=Nb; j; j--)   {      for (i=Mb; i; i--, A += NB, v += incV)         ATL_row2blkT_NB(NB, NB, A, lda, v, alpha);      if (mr)      {         ATL_row2blkT_KB(mr, NB, A, lda, vv, alpha);         vv += incVV;      }      A += incA;      V += NBNB;      v = V;   }   if (nr)   {      for (i=Mb; i; i--, A += NB, v += incV)         ATL_row2blkT_KB(NB, nr, A, lda, v, alpha);      if (mr) ATL_row2blkT_KB(mr, nr, A, lda, vv, alpha);   }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -