⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 atl_cprow2blkt.c

📁 基于Blas CLapck的.用过的人知道是干啥的
💻 C
字号:
/* *             Automatically Tuned Linear Algebra Software v3.8.0 *                    (C) Copyright 2003 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: *   1. Redistributions of source code must retain the above copyright *      notice, this list of conditions and the following disclaimer. *   2. Redistributions in binary form must reproduce the above copyright *      notice, this list of conditions, and the following disclaimer in the *      documentation and/or other materials provided with the distribution. *   3. The name of the ATLAS group or the names of its contributers may *      not be used to endorse or promote products derived from this *      software without specific written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ATLAS GROUP OR ITS CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * */#include "atlas_pkblas.h"#ifdef Conj_   #if defined(ALPHA1)      #define scalcp(A_, rp_, ip_) \      { \         *(rp_) = *(A_); \         *(ip_) = -((A_)[1]); \      }   #elif defined(ALPHAXI0)      #define scalcp(A_, rp_, ip_) \      { \         *(rp_) = ralpha * *(A_); \         *(ip_) = calpha * (A_)[1]; \      }   #elif defined(ALPHAX)      #define scalcp(A_, rp_, ip_) \      { \         ra = *(A_); ia = (A_)[1]; \         *(rp_) = ralpha * ra + ialpha * ia; \         *(ip_) = ialpha * ra - ralpha * ia; \      }   #endif#else   #if defined(ALPHA1)      #define scalcp(A_, rp_, ip_) \      { \         *(rp_) = *(A_); \         *(ip_) = (A_)[1]; \      }   #elif defined(ALPHAXI0)      #define scalcp(A_, rp_, ip_) \      { \         *(rp_) = ralpha * *(A_); \         *(ip_) = ralpha * (A_)[1]; \      }   #elif defined(ALPHAX)      #define scalcp(A_, rp_, ip_) \      { \         ra = *(A_); ia = (A_)[1]; \         *(rp_) = ralpha * ra - ialpha * ia; \         *(ip_) = ialpha * ra + ralpha * ia; \      }   #endif#endif#ifdef Conj_   #define prow2blkT Mjoin(Mjoin(PATL,prow2blkH),NM)   #define prow2blkTF Mjoin(PATL,prow2blkHF)   #define prow2blk_KB Mjoin(Mjoin(PATL,prow2blkH_KB),NM)#else   #define prow2blkT Mjoin(Mjoin(PATL,prow2blkT),NM)   #define prow2blkTF Mjoin(PATL,prow2blkTF)   #define prow2blk_KB Mjoin(Mjoin(PATL,prow2blkT_KB),NM)#endifvoid prow2blk_KB(const int mb, const int nb, const SCALAR alpha, const TYPE *A,                 int lda, const int ldainc, TYPE *V)/* * This routine used by full copy to copy one mbxnb block of a matrix A to * block-major nbxmb storage (A is transposed during the copy) */{   TYPE *v;   const int mn = mb * nb, ldainc2 = ldainc+ldainc;   int i, j;   #ifdef ALPHAXI0      #ifdef Conj_         const register TYPE ralpha = *alpha, calpha = -ralpha;      #else         const register TYPE ralpha = *alpha;      #endif   #elif defined(ALPHAX)      register const TYPE ralpha=(*alpha), ialpha = alpha[1];      register TYPE ra, ia;   #endif   if (ldainc == -1) lda--;   lda -= mb;   lda += lda;   for (j=nb; j; j--)   {      v = V++;      for (i=0; i != mb; i++, v += nb, A += 2) scalcp(A, v+mn, v);      A += lda;      lda += ldainc2;   }}void Mjoin(prow2blkT,_blk)(const int blk, const int M, const int N,                           const SCALAR alpha, const TYPE *A, int lda,                           const int ldainc, TYPE *V)/* * Given a packed Upper matrix A, copies & transposes M rows starting at A into * block-major row panel *    ldainc =  0 : General rectangular *    ldainc =  1 : Upper *    ldainc = -1 : Lower */{   const int kb = Mmin(blk,N);   const int ncb = N / kb, nr = N - ncb*kb;   const int incV = kb*M - kb;   const int VN = kb*M, vn = nr*M;   int jb, i, j;   TYPE *v;   #ifdef ALPHAXI0      #ifdef Conj_         const register TYPE ralpha = *alpha, calpha = -ralpha;      #else         const register TYPE ralpha = *alpha;      #endif   #elif defined(ALPHAX)      register const TYPE ralpha=(*alpha), ialpha = alpha[1];      register TYPE ra, ia;   #endif   if (ldainc == -1) lda--;   lda -= M;   lda += lda;   for (jb=ncb; jb; jb--)   {      for (j=kb; j; j--)      {         v = V++;         for (i=0; i != M; i++, v += kb, A += 2) scalcp(A, v+VN, v);         A += lda;         lda += ldainc;      }      V += incV;   }   for (j=nr; j; j--)   {      v = V++;      for (i=0; i != M; i++, v += nr, A += 2) scalcp(A, v+vn, v);      A += lda;      lda += ldainc;   }}void prow2blkT(const int M, const int N, const SCALAR alpha, const TYPE *A,               int lda, const int ldainc, TYPE *V){   if (ldainc) Mjoin(prow2blkT,_blk)(NB, M, N, alpha, A, lda, ldainc, V);#ifdef Conj_   else Mjoin(Mjoin(PATL,row2blkC),NM)(N, M, A, lda, V, alpha);#else   else Mjoin(Mjoin(PATL,row2blkT),NM)(N, M, A, lda, V, alpha);#endif}#ifdef ALPHA1#ifdef Conj_   #define dr2bT2_a1   Mjoin(PATL,row2blkC2_a1)   #define dr2bT2_aXi0 Mjoin(PATL,row2blkC2_aXi0)   #define dr2bT2_aX   Mjoin(PATL,row2blkC2_aX)   #define ATL_row2blk_KB_a1   Mjoin(Mjoin(PATL,prow2blkH_KB),_a1)   #define ATL_row2blk_KB_aXi0 Mjoin(Mjoin(PATL,prow2blkH_KB),_aXi0)   #define ATL_row2blk_KB_aX   Mjoin(Mjoin(PATL,prow2blkH_KB),_aX)#else   #define dr2bT2_a1   Mjoin(PATL,row2blkT2_a1)   #define dr2bT2_aXi0 Mjoin(PATL,row2blkT2_aXi0)   #define dr2bT2_aX   Mjoin(PATL,row2blkT2_aX)   #define ATL_row2blk_KB_a1   Mjoin(Mjoin(PATL,prow2blkT_KB),_a1)   #define ATL_row2blk_KB_aXi0 Mjoin(Mjoin(PATL,prow2blkT_KB),_aXi0)   #define ATL_row2blk_KB_aX   Mjoin(Mjoin(PATL,prow2blkT_KB),_aX)#endifvoid Mjoin(prow2blkTF,_blk)   (const int blk, const int M, const int N, const SCALAR alpha,    const TYPE *A, int lda, const int ldainc, TYPE *V){   const int mb = Mmin(blk,M), nMb = M/blk;   const int m = blk*nMb, n = blk*(N/blk);   const int nr = N - n, mr = M - m;   const int incVm = blk*(N+N), incVV = blk*(mr+mr), nbnb=(blk*blk)<<1;   int i, j, ib, jb;   const enum PACK_UPLO UA = (ldainc == 1) ? PackUpper :      ( (ldainc == -1) ? PackLower : PackGen );   TYPE *v, *vv = V+nMb*incVm;   void (*row2blk)(const int M, const int N, const SCALAR alpha, const TYPE *A,                   int lda, const int ldainc, TYPE *V);   if (alpha[1] == ATL_rzero)   {      if (*alpha == ATL_rone) row2blk = ATL_row2blk_KB_a1;      else row2blk = ATL_row2blk_KB_aXi0;   }   else row2blk = ATL_row2blk_KB_aX;   for (j=0; j < n; j += blk)   {      for (v=V, i=0; i < m; i += blk, v += incVm)         row2blk(blk, blk, alpha, A+MindexP(UA,i,j,lda), Mpld(UA,j,lda),                 ldainc, v);      if (mr)      {         row2blk(mr, blk, alpha, A+MindexP(UA,m,j,lda), Mpld(UA,j,lda),                 ldainc, vv);         vv += incVV;      }      V += nbnb;   }   if (nr)   {      for (v=V, i=0; i < m; i += blk, v += incVm)         row2blk(blk, nr, alpha, A+MindexP(UA,i,n,lda), Mpld(UA,n,lda),                 ldainc, v);      if (mr)         row2blk(mr, nr, alpha, A+MindexP(UA,m,n,lda), Mpld(UA,n,lda),                 ldainc, vv);   }}void prow2blkTF(const int M, const int N, const SCALAR alpha, const TYPE *A,                int lda, const int ldainc, TYPE *V){   if (ldainc) Mjoin(prow2blkTF,_blk)(NB, M, N, alpha, A, lda, ldainc, V);   else if (alpha[1] == ATL_rzero)   {      if (*alpha == ATL_rone) dr2bT2_a1(M, N, A, lda, V, alpha);      else dr2bT2_aXi0(M, N, A, lda, V, alpha);   }   else dr2bT2_aX(M, N, A, lda, V, alpha);}#endif

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -