⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 atl_mmbpp.c

📁 基于Blas CLapck的.用过的人知道是干啥的
💻 C
字号:
/* *             Automatically Tuned Linear Algebra Software v3.8.0 *                    (C) Copyright 2007 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: *   1. Redistributions of source code must retain the above copyright *      notice, this list of conditions and the following disclaimer. *   2. Redistributions in binary form must reproduce the above copyright *      notice, this list of conditions, and the following disclaimer in the *      documentation and/or other materials provided with the distribution. *   3. The name of the ATLAS group or the names of its contributers may *      not be used to endorse or promote products derived from this *      software without specific written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ATLAS GROUP OR ITS CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * */#include "atlas_misc.h"#include "atlas_lvl3.h"#include <stdlib.h>int Mjoin(PATL,mmBPP)(const enum ATLAS_TRANS TA, const enum ATLAS_TRANS TB,                      const int M, const int N, const int K,                      const SCALAR alpha, const TYPE *A, const int lda,                      const TYPE *B, const int ldb, const SCALAR beta,                      TYPE *C, const int ldc0)/* * Copy algorithm, assuming M <= MB && N <= NB, K large (shape: block, panel, * panel); copies A and B on-the-fly */{   void *vC;   TYPE *pA, *pB, *pC;   void (*A2blk)(int N, int M, const SCALAR alpha, const TYPE *A, int lda,                 TYPE *C, int ldc);   void (*B2blk)(int N, int M, const SCALAR alpha, const TYPE *A, int lda,                 TYPE *C, int ldc);   NBMM0 NBmm0, NBmm1;   int ldc, m, n, nblk, k, kr, incA, incB;   if (M > MB || N > NB)  /* don't handle multiple M/N blocks */      return(1);   if (M < MB && M+ATL_mmMU >= MB)      m = MB;   else      m = M;   if (N < NB && N+ATL_mmNU >= NB)      n = NB;   else      n = N;   ldc = (((m*sizeof(TYPE)+ATL_Cachelen-1)/ATL_Cachelen)*ATL_Cachelen)          / sizeof(TYPE);   vC = malloc(ATL_Cachelen+ATL_MulBySize(ldc*n+KB*(m+n)));   if (!vC) return(-1);   pC = ATL_AlignPtr(vC);   pA = pC + ldc*n;   pB = pA + KB*m;   if (TA == AtlasNoTrans)   {      A2blk = Mjoin(PATL,gemoveT);      incA = lda*KB;   }   else   {      A2blk = Mjoin(PATL,gemove);      incA = KB;   }   if (TB == AtlasNoTrans)   {      B2blk = Mjoin(PATL,gemove);      incB = KB;   }   else   {      B2blk = Mjoin(PATL,gemoveT);      incB = ldb*KB;   }/* * If we are going to multiply zeros to avoid cleanup, zero workspace */   if (m != M || n != N)      Mjoin(PATL,zero)(ldc*n+KB*(m+n), pC, 1);/* * See what kernel we're calling */   if (m == MB)   {      if (n == NB)  /* no cleanup */      {         NBmm0 = NBmm_b0;         NBmm1 = NBmm_b1;      }      else         /* need to call N-cleanup kernel */      {         NBmm0 = Mjoin(PATL,pNBmm_b0);         NBmm1 = Mjoin(PATL,pNBmm_b1);      }   }   else if (n == NB) /* call M-cleanup kernel */   {      NBmm0 = Mjoin(PATL,pMBmm_b0);      NBmm1 =  Mjoin(PATL,pMBmm_b1);   }   else              /* both N & M are cleanup, call general K clean */   {      NBmm0 = Mjoin(PATL,pKBmm);      NBmm1 = Mjoin(PATL,pKBmm);      if (m == M && n == N)  /* must zero pC if not done above */         Mjoin(PATL,zero)(ldc*n, pC, 1);   }   nblk = K / KB;   kr = K - nblk*KB;   if (!nblk && kr)      Mjoin(PATL,zero)(ldc*n, pC, 1);   Mjoin(PATL,mmK)(M, m, N, n, nblk, kr, (kr && kr+4 >= KB) ? KB : 0,                   ATL_rone, ATL_rone, ATL_rzero, A, lda, incA, pA, 0,                   B, ldb, incB, pB, 0, pC, ldc, A2blk, B2blk, NBmm0, NBmm1);   Mjoin(PATL,geadd)(M, N, alpha, pC, ldc, beta, C, ldc0);   free(vC);   return(0);}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -