📄 sba_lapack.c
字号:
}/* * This function computes the inverse of a square matrix whose upper triangle * is stored in A into its lower triangle using LU decomposition * * The function returns 0 in case of error (e.g. A is singular), * 1 if successfull * * This function is often called repetitively to solve problems of identical * dimensions. To avoid repetitive malloc's and free's, allocated memory is * retained between calls and free'd-malloc'ed when not of the appropriate size. * A call with NULL as the first argument forces this memory to be released. */int sba_symat_invert_LU(double *A, int m){static double *buf=NULL;static int buf_sz=0, nb=0;int a_sz, ipiv_sz, work_sz, tot_sz;register int i, j;int info, *ipiv;double *a, *work; if(A==NULL){ if(buf) free(buf); buf=NULL; buf_sz=0; return 1; } /* calculate required memory size */ ipiv_sz=m; a_sz=m*m; if(!nb){#ifndef SBA_LS_SCARCE_MEMORY double tmp; work_sz=-1; // workspace query; optimal size is returned in tmp F77_FUNC(dgetri)((int *)&m, NULL, (int *)&m, NULL, (double *)&tmp, (int *)&work_sz, (int *)&info); nb=((int)tmp)/m; // optimal worksize is m*nb#else nb=1; // min worksize is m#endif /* SBA_LS_SCARCE_MEMORY */ } work_sz=nb*m; tot_sz=ipiv_sz*sizeof(int) + (a_sz + work_sz)*sizeof(double); if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */ if(buf) free(buf); /* free previously allocated memory */ buf_sz=tot_sz; buf=(double *)malloc(buf_sz); if(!buf){ fprintf(stderr, "memory allocation in sba_symat_invert_LU() failed!\n"); exit(1); } } ipiv=(int *)buf; a=(double *)(ipiv + ipiv_sz); work=a+a_sz; /* store A (column major!) into a */ for(i=0; i<m; ++i) for(j=i; j<m; ++j) a[i+j*m]=a[j+i*m]=A[i*m+j]; // copy A's upper part to a's upper & lower /* LU decomposition for A */ F77_FUNC(dgetrf)((int *)&m, (int *)&m, a, (int *)&m, ipiv, (int *)&info); if(info!=0){ if(info<0){ fprintf(stderr, "argument %d of dgetrf illegal in sba_symat_invert_LU()\n", -info); exit(1); } else{ fprintf(stderr, "singular matrix A for dgetrf in sba_symat_invert_LU()\n"); return 0; } } /* (A)^{-1} from LU */ F77_FUNC(dgetri)((int *)&m, a, (int *)&m, ipiv, work, (int *)&work_sz, (int *)&info); if(info!=0){ if(info<0){ fprintf(stderr, "argument %d of dgetri illegal in sba_symat_invert_LU()\n", -info); exit(1); } else{ fprintf(stderr, "singular matrix A for dgetri in sba_symat_invert_LU()\n"); return 0; } } /* store (A)^{-1} in A's lower triangle */ for(i=0; i<m; ++i) for(j=0; j<=i; ++j) A[i*m+j]=a[i+j*m]; return 1;}/* * This function computes the inverse of a square symmetric positive definite * matrix whose upper triangle is stored in A into its lower triangle using * Cholesky factorization * * The function returns 0 in case of error (e.g. A is not positive definite or singular), * 1 if successfull * * This function is often called repetitively to solve problems of identical * dimensions. To avoid repetitive malloc's and free's, allocated memory is * retained between calls and free'd-malloc'ed when not of the appropriate size. * A call with NULL as the first argument forces this memory to be released. */int sba_symat_invert_Chol(double *A, int m){static double *buf=NULL;static int buf_sz=0;int a_sz, tot_sz;register int i, j;int info;double *a; if(A==NULL){ if(buf) free(buf); buf=NULL; buf_sz=0; return 1; } /* calculate required memory size */ a_sz=m*m; tot_sz=a_sz; if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */ if(buf) free(buf); /* free previously allocated memory */ buf_sz=tot_sz; buf=(double *)malloc(buf_sz*sizeof(double)); if(!buf){ fprintf(stderr, "memory allocation in sba_symat_invert_Chol() failed!\n"); exit(1); } } a=(double *)buf; /* store A into a; A is assumed symmetric, hence no transposition is needed */ for(i=0, j=a_sz; i<j; ++i) a[i]=A[i]; /* Cholesky factorization for A; a's lower part corresponds to A's upper */ //F77_FUNC(dpotrf)("L", (int *)&m, a, (int *)&m, (int *)&info); F77_FUNC(dpotf2)("L", (int *)&m, a, (int *)&m, (int *)&info); /* error treatment */ if(info!=0){ if(info<0){ fprintf(stderr, "LAPACK error: illegal value for argument %d of dpotrf in sba_symat_invert_Chol()\n", -info); exit(1); } else{ fprintf(stderr, "LAPACK error: the leading minor of order %d is not positive definite,\nthe factorization could not be completed for dpotrf in sba_symat_invert_Chol()\n", info); return 0; } } /* (A)^{-1} from Cholesky */ F77_FUNC(dpotri)("L", (int *)&m, a, (int *)&m, (int *)&info); if(info!=0){ if(info<0){ fprintf(stderr, "argument %d of dpotri illegal in sba_symat_invert_Chol()\n", -info); exit(1); } else{ fprintf(stderr, "the (%d, %d) element of the factor U or L is zero, singular matrix A for dpotri in sba_symat_invert_Chol()\n", info, info); return 0; } } /* store (A)^{-1} in A's lower triangle. The lower triangle of the symmetric A^{-1} is in the lower triangle of a */ for(i=0; i<m; ++i) for(j=0; j<=i; ++j) A[i*m+j]=a[i+j*m]; return 1;}/* * This function computes the inverse of a symmetric indefinite * matrix whose upper triangle is stored in A into its lower triangle * using LDLT factorization with the pivoting strategy of Bunch and Kaufman * * The function returns 0 in case of error (e.g. A is singular), * 1 if successfull * * This function is often called repetitively to solve problems of identical * dimensions. To avoid repetitive malloc's and free's, allocated memory is * retained between calls and free'd-malloc'ed when not of the appropriate size. * A call with NULL as the first argument forces this memory to be released. */int sba_symat_invert_BK(double *A, int m){static double *buf=NULL;static int buf_sz=0, nb=0;int a_sz, ipiv_sz, work_sz, tot_sz;register int i, j;int info, *ipiv;double *a, *work; if(A==NULL){ if(buf) free(buf); buf=NULL; buf_sz=0; return 1; } /* calculate required memory size */ ipiv_sz=m; a_sz=m*m; if(!nb){#ifndef SBA_LS_SCARCE_MEMORY double tmp; work_sz=-1; // workspace query; optimal size is returned in tmp F77_FUNC(dsytrf)("L", (int *)&m, NULL, (int *)&m, NULL, (double *)&tmp, (int *)&work_sz, (int *)&info); nb=((int)tmp)/m; // optimal worksize is m*nb#else nb=-1; // min worksize is 1#endif /* SBA_LS_SCARCE_MEMORY */ } work_sz=(nb!=-1)? nb*m : 1; work_sz=(work_sz>=m)? work_sz : m; /* ensure that work is at least m elements long, as required by dsytri */ tot_sz=ipiv_sz*sizeof(int) + (a_sz + work_sz)*sizeof(double); if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */ if(buf) free(buf); /* free previously allocated memory */ buf_sz=tot_sz; buf=(double *)malloc(buf_sz); if(!buf){ fprintf(stderr, "memory allocation in sba_symat_invert_BK() failed!\n"); exit(1); } } ipiv=(int *)buf; a=(double *)(ipiv + ipiv_sz); work=a+a_sz; /* store A into a; A is assumed symmetric, hence no transposition is needed */ for(i=0, j=a_sz; i<j; ++i) a[i]=A[i]; /* LDLT factorization for A; a's lower part corresponds to A's upper */ F77_FUNC(dsytrf)("L", (int *)&m, a, (int *)&m, ipiv, work, (int *)&work_sz, (int *)&info); if(info!=0){ if(info<0){ fprintf(stderr, "argument %d of dsytrf illegal in sba_symat_invert_BK()\n", -info); exit(1); } else{ fprintf(stderr, "singular block diagonal matrix D for dsytrf in sba_symat_invert_BK() [D(%d, %d) is zero]\n", info, info); return 0; } } /* (A)^{-1} from LDLT */ F77_FUNC(dsytri)("L", (int *)&m, a, (int *)&m, ipiv, work, (int *)&info); if(info!=0){ if(info<0){ fprintf(stderr, "argument %d of dsytri illegal in sba_symat_invert_BK()\n", -info); exit(1); } else{ fprintf(stderr, "D(%d, %d)=0, matrix is singular and its inverse could not be computed in sba_symat_invert_BK()\n", info, info); return 0; } } /* store (A)^{-1} in A's lower triangle. The lower triangle of the symmetric A^{-1} is in the lower triangle of a */ for(i=0; i<m; ++i) for(j=0; j<=i; ++j) A[i*m+j]=a[i+j*m]; return 1;}#define __CG_LINALG_BLOCKSIZE 8/* Dot product of two vectors x and y using loop unrolling and blocking. * see http://www.abarnett.demon.co.uk/tutorial.html */inline static double dprod(const int n, const double *const x, const double *const y){register int i, j1, j2, j3, j4, j5, j6, j7; int blockn;register double sum0=0.0, sum1=0.0, sum2=0.0, sum3=0.0, sum4=0.0, sum5=0.0, sum6=0.0, sum7=0.0; /* n may not be divisible by __CG_LINALG_BLOCKSIZE, * go as near as we can first, then tidy up. */ blockn = (n / __CG_LINALG_BLOCKSIZE) * __CG_LINALG_BLOCKSIZE; /* unroll the loop in blocks of `__CG_LINALG_BLOCKSIZE' */ for(i=0; i<blockn; i+=__CG_LINALG_BLOCKSIZE){ sum0+=x[i]*y[i]; j1=i+1; sum1+=x[j1]*y[j1]; j2=i+2; sum2+=x[j2]*y[j2]; j3=i+3; sum3+=x[j3]*y[j3]; j4=i+4; sum4+=x[j4]*y[j4]; j5=i+5; sum5+=x[j5]*y[j5]; j6=i+6; sum6+=x[j6]*y[j6]; j7=i+7; sum7+=x[j7]*y[j7]; } /* * There may be some left to do. * This could be done as a simple for() loop, * but a switch is faster (and more interesting) */ if(i<n){ /* Jump into the case at the place that will allow * us to finish off the appropriate number of items. */ switch(n - i){ case 7 : sum0+=x[i]*y[i]; ++i; case 6 : sum1+=x[i]*y[i]; ++i; case 5 : sum2+=x[i]*y[i]; ++i; case 4 : sum3+=x[i]*y[i]; ++i; case 3 : sum4+=x[i]*y[i]; ++i; case 2 : sum5+=x[i]*y[i]; ++i; case 1 : sum6+=x[i]*y[i]; ++i; } } return sum0+sum1+sum2+sum3+sum4+sum5+sum6+sum7;}/* Compute z=x+a*y for two vectors x and y and a scalar a; z can be one of x, y. * Similarly to the dot product routine, this one uses loop unrolling and blocking */inline static void daxpy(const int n, double *const z, const double *const x, const double a, const double *const y){ register int i, j1, j2, j3, j4, j5, j6, j7; int blockn; /* n may not be divisible by __CG_LINALG_BLOCKSIZE, * go as near as we can first, then tidy up. */ blockn = (n / __CG_LINALG_BLOCKSIZE) * __CG_LINALG_BLOCKSIZE; /* unroll the loop in blocks of `__CG_LINALG_BLOCKSIZE' */ for(i=0; i<blockn; i+=__CG_LINALG_BLOCKSIZE){ z[i]=x[i]+a*y[i]; j1=i+1; z[j1]=x[j1]+a*y[j1]; j2=i+2; z[j2]=x[j2]+a*y[j2]; j3=i+3; z[j3]=x[j3]+a*y[j3]; j4=i+4; z[j4]=x[j4]+a*y[j4]; j5=i+5; z[j5]=x[j5]+a*y[j5]; j6=i+6; z[j6]=x[j6]+a*y[j6]; j7=i+7; z[j7]=x[j7]+a*y[j7]; } /* * There may be some left to do. * This could be done as a simple for() loop, * but a switch is faster (and more interesting) */ if(i<n){ /* Jump into the case at the place that will allow * us to finish off the appropriate number of items. */ switch(n - i){ case 7 : z[i]=x[i]+a*y[i]; ++i; case 6 : z[i]=x[i]+a*y[i]; ++i; case 5 : z[i]=x[i]+a*y[i]; ++i; case 4 : z[i]=x[i]+a*y[i]; ++i; case 3 : z[i]=x[i]+a*y[i]; ++i; case 2 : z[i]=x[i]+a*y[i]; ++i; case 1 : z[i]=x[i]+a*y[i]; ++i; } } }/* * This function returns the solution of Ax = b where A is posititive definite, * based on the conjugate gradients method; see "An intro to the CG method" by J.R. Shewchuk, p. 50-51 * * A is mxm, b, x are is mx1. Argument niter specifies the maximum number of * iterations and eps is the desired solution accuracy. niter<0 signals that * x contains a valid initial approximation to the solution; if niter>0 then * the starting point is taken to be zero. Argument prec selects the desired * preconditioning method as follows: * 0: no preconditioning * 1: jacobi (diagonal) preconditioning * 2: SSOR preconditioning * Argument iscolmaj specifies whether A is stored in column or row major order. * * The function returns 0 in case of error, * the number of iterations performed if successfull * * This function is often called repetitively to solve problems of identical * dimensions. To avoid repetitive malloc's and free's, allocated memory is * retained between calls and free'd-malloc'ed when not of the appropriate size. * A call with NULL as the first argument forces this memory to be released. */int sba_Axb_CG(double *A, double *B, double *x, int m, int niter, double eps, int prec, int iscolmaj){static double *buf=NULL;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -