invtrsm.c

来自「基于Blas CLapck的.用过的人知道是干啥的」· C语言 代码 · 共 718 行 · 第 1/2 页

C
718
字号
/* *             Automatically Tuned Linear Algebra Software v3.8.0 *                    (C) Copyright 2001 R. Clint Whaley * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: *   1. Redistributions of source code must retain the above copyright *      notice, this list of conditions and the following disclaimer. *   2. Redistributions in binary form must reproduce the above copyright *      notice, this list of conditions, and the following disclaimer in the *      documentation and/or other materials provided with the distribution. *   3. The name of the ATLAS group or the names of its contributers may *      not be used to endorse or promote products derived from this *      software without specific written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ATLAS GROUP OR ITS CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * */#include "atlas_misc.h"#include "atlas_lapack.h"#include "cblas.h"#include "atlas_cblastypealias.h"#include "atlas_tst.h"#ifdef TimeC   #include "clapack.h"   #define CLP Mjoin(clapack_,PRE)#endif#ifdef GCCWIN   ___main(){} __main(){} MAIN__(){} _MAIN_(){}#endif#define CBP Mjoin(cblas_,PRE)double time00();enum TEST_UPLO {TestGE=0, TestUpper=121, TestLower=122};static void geinv   (const enum CBLAS_ORDER Order, const int N, TYPE *A, const int lda){   int *ipiv;   TYPE *wrk;   int lwrk;   ipiv = malloc(sizeof(int)*N);   ATL_assert(ipiv);   #ifdef TimeF77      lwrk = N * Mjoin(PATL,GetNB)();      wrk = malloc(ATL_MulBySize(lwrk));      if (Order == AtlasRowMajor) Mjoin(PATL,tstsqtran)(N, A, lda);      ATL_assert(Mjoin(PATL,f77getrf)(AtlasColMajor, N, N, A, lda, ipiv) == 0);      ATL_assert(Mjoin(PATL,f77getri)         (AtlasColMajor, N, A, lda, ipiv, wrk, &lwrk) == 0);      if (Order == AtlasRowMajor) Mjoin(PATL,tstsqtran)(N, A, lda);      free(wrk);   #elif defined(TimeC)      ATL_assert(Mjoin(CLP,getrf)(Order, N, N, A, lda, ipiv) == 0);      ATL_assert(Mjoin(CLP,getri)(Order, N, A, lda, ipiv) == 0);  #else      lwrk = N * Mjoin(PATL,GetNB)();      wrk = malloc(ATL_MulBySize(lwrk));      ATL_assert(Mjoin(PATL,getrf)(Order, N, N, A, lda, ipiv) == 0);      ATL_assert(Mjoin(PATL,getri)(Order, N, A, lda, ipiv, wrk, &lwrk) == 0);      free(wrk);   #endif   free(ipiv);}static void tsthetran(const int N, TYPE *A, const int lda){   int i;   const int lda2=lda SHIFT;   Mjoin(PATL,tstsqtran)(N, A, lda);#ifdef TCPLX   for (i=0; i < N; i++) Mjoin(PATLU,scal)(N, ATL_rnone, A+1+i*lda2, 2);#endif}static void poinv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,                  const int N, TYPE *A, const int lda){   #ifdef TimeF77      if (Order == AtlasRowMajor) tsthetran(N, A, lda);      ATL_assert(Mjoin(PATL,f77potrf)(Uplo, N, A, lda) == 0);      ATL_assert(Mjoin(PATL,f77trtri)(Uplo, CblasNonUnit, N, A, lda) == 0);      ATL_assert(Mjoin(PATL,f77lauum)(Uplo, N, A, lda) == 0);      if (Order == AtlasRowMajor) tsthetran(N, A, lda);   #elif defined(TimeC)      ATL_assert(Mjoin(CLP,potrf)(Order, Uplo, N, A, lda) == 0);      ATL_assert(Mjoin(CLP,trtri)(Order, Uplo, CblasNonUnit, N, A, lda) == 0);      ATL_assert(Mjoin(CLP,lauum)(Order, Uplo, N, A, lda) == 0);   #else      ATL_assert(Mjoin(PATL,potrf)(Order, Uplo, N, A, lda) == 0);      ATL_assert(Mjoin(PATL,trtri)(Order, Uplo, CblasNonUnit, N, A, lda) == 0);      Mjoin(PATL,lauum)(Order, Uplo, N, A, lda);   #endif}static void test_inv(const enum CBLAS_ORDER Order, const enum TEST_UPLO Uplo,                     const int N, TYPE *A, const int lda){   if (Uplo == TestGE) geinv(Order, N, A, lda);   else poinv(Order, (enum CBLAS_ORDER) Uplo, N, A, lda);}TYPE *GetGE(int M, int N, int lda){   TYPE *A;   A = malloc(ATL_MulBySize(lda)*N);   if (A) Mjoin(PATL,gegen)(M, N, A, lda, M*N+lda);   return(A);}static void CrapUpTri   (enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, int N, TYPE *A, int lda)/* * Puts crap on opposite triangle to Uplo, so as to ensure error on use */{   const int lda2=(lda SHIFT), ldap1=((lda+1)SHIFT);   int j;   if (Order == CblasRowMajor)   {      if (Uplo == CblasLower) Uplo = CblasUpper;      else Uplo = CblasLower;   }   if (Uplo == CblasLower)   {      A += lda2;      for (j=1; j < N; j++, A += lda2)         Mjoin(PATLU,set)(j SHIFT, -50000000.0, A, 1);   }   else   {      for (j=0; j < N; j++, A += ldap1)         Mjoin(PATLU,set)((N-j-1)SHIFT, -5500000.0, A+(1 SHIFT), 1);   }}static TYPE *DupMat(enum ATLAS_ORDER Order, int M, int N, TYPE *A, int lda,                    int ldc)/* * returns a duplicate of the A matrix, with new leading dimension */{   int i, j, M2;   const int ldc2 = (ldc SHIFT), lda2 = (lda SHIFT);   TYPE *C;   if (Order == CblasRowMajor)   {      i = M;      M = N;      N = i;   }   M2 = M SHIFT;   ATL_assert(ldc >= M);   C = malloc(ATL_MulBySize(ldc)*N);   ATL_assert(C);   for (j=0; j != N; j++)   {      for (i=0; i != M2; i++) C[i] = A[i];      C += ldc2;      A += lda2;   }   return(C-N*ldc2);}#include <math.h>static void PosDefGen   (enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, int N, TYPE *A, int lda)/* * Generates a reasonably conditioned positive definite matrix */{   TYPE *aa, t0, *L;   TYPE val, bias, sign;   int j;   const int lda2=(lda SHIFT), ldap1=((lda+1)SHIFT);   Mjoin(PATL,gegen)(N, N, A, lda, N*N+lda);   if (Order == CblasRowMajor)   {      if (Uplo == CblasLower) Uplo = CblasUpper;      else Uplo = CblasLower;   }/* * It should be enough to make diagonal non-zero, but small numbers are very * ill-conditioned, and therefore may not be solvable in practice.  Therefore, * scale the diagonal by log(N). */   bias = log(N);   bias = (bias < 1.0) ? 1.0 : bias;   for (aa=A,j=0; j < N; j++, aa += ldap1)   {      val = *aa;      sign = (val < 0.0) ? -1.0 : 1.0;      val = (val < 0.0) ? -val : val;      val = (val+bias)*sign;      *aa = val;   }/* * For imaginary numbers, force zero imaginary component on diagonal */   #ifdef TCPLX      Mjoin(Mjoin(ATL_,UPR),set)(N, 0.0, A+1, ldap1);   #endif/* * Zero non-active portion of matrix */   if (Uplo == CblasLower)   {      for (j=0, aa=A; j < N; j++, aa += lda2)         Mjoin(PATL,zero)(j, aa, 1);   }   else   {      for (j=0, aa=A+(1 SHIFT); j < N; j++, aa += ldap1)         Mjoin(PATL,zero)(N-j-1, aa, 1);   }/* * Force A = L * L', where L is Lower or Upper as requested, to make pos def */   L = DupMat(CblasColMajor, N, N, A, lda, N);   #ifdef TCPLX      Mjoin(CBP,herk)(CblasColMajor, Uplo, CblasNoTrans, N, N, ATL_rone, L, N,                      ATL_rzero, A, lda);   #else      Mjoin(CBP,syrk)(CblasColMajor, Uplo, CblasNoTrans, N, N, ATL_rone, L, N,                      ATL_rzero, A, lda);#endif   free(L);/* * Make sure non-triangular elements are bad for error detection */   CrapUpTri(CblasColMajor, Uplo, N, A, lda);}static void MakeHEDiagDom   (enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, int N, TYPE *A, int lda)/* * Makes hermitian matrix diagonally dominant */{   TYPE *aa, t0;   int j;   const int lda2=(lda SHIFT), ldap1=((lda+1)SHIFT);   if (Order == CblasRowMajor)   {      if (Uplo == CblasLower) Uplo = CblasUpper;      else Uplo = CblasLower;   }   if (Uplo == CblasLower)   {      for (j=0; j < N; j++, A += ldap1)      {         #ifdef TREAL            *A = 1.0 + cblas_asum(N-j, A, 1);            *A += cblas_asum(j, A-lda*j, lda);         #elif defined(SCPLX)            *A = 1.0 + cblas_scasum(N-j, A, 1);            *A += cblas_scasum(j, A-lda2*j, lda);         #else            *A = 1.0 + cblas_dzasum(N-j, A, 1);            *A += cblas_dzasum(j, A-lda2*j, lda);         #endif         #ifdef TCPLX            A[1] = ATL_rzero;         #endif      }   }   else /* Upper */   {      for (j=0; j < N; j++, A += ldap1)      {         #ifdef TREAL            *A = 1.0 + cblas_asum(N-j, A, lda);            *A += cblas_asum(j, A-j, 1);         #else            #ifdef SCPLX               *A = 1.0 + cblas_scasum(N-j, A, lda);               *A += cblas_scasum(j, A-j*2, 1);            #else               *A = 1.0 + cblas_dzasum(N-j, A, lda);               *A += cblas_dzasum(j, A-j*2, 1);            #endif            A[1] = ATL_rzero;         #endif      }   }}static void hegen   (enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, int N, TYPE *A, int lda){#ifdef POSDEFGEN   PosDefGen(Order, Uplo, N, A, lda);#else   MakeHEDiagDom(Order, Uplo, N, A, lda);   CrapUpTri(Order, Uplo, N, A, lda);#endif}static TYPE *GetHE(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, int N, int lda)/* * Gets symm/hemm matrix, and puts a bunch of crap in other side to make * sure factorization doesn't use it, and makes pos def by making it * diag dominant */{   TYPE *A;   A = GetGE(N, N, lda);   if (!A) return(NULL);   hegen(Order, Uplo, N, A, lda);   return(A);}static void ReflectMat   (enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, int N, TYPE *A, int lda)/* * Takes a symmetric matrix, and makes it general by reflecting across diagonal */{   const int lda2 = (lda SHIFT);   int j;   if (Order == CblasRowMajor)   {      if (Uplo == CblasUpper) Uplo = CblasLower;      else Uplo = CblasUpper;   }   if (Uplo == CblasUpper)   {      for (j=0; j < N; j++) cblas_copy(j, A+j*lda2, 1, A+(j SHIFT), lda);   }   else /* lower matrix */   {      for (j=0; j < N; j++) cblas_copy(j, A+(j SHIFT), lda,  A+j*lda2, 1);   }}static void ReflectHE   (enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, int N, TYPE *A, int lda)/* * Reflects matrix, makes it hermitian */{

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?