📄 sba_lapack.c
字号:
static int buf_sz=0;register int i, j;register double *aim;int iter, a_sz, res_sz, d_sz, q_sz, s_sz, wk_sz, z_sz, tot_sz;double *a, *res, *d, *q, *s, *wk, *z;double delta0, deltaold, deltanew, alpha, beta, eps_sq=eps*eps;register double sum;int rec_res; if(A==NULL){ if(buf) free(buf); buf=NULL; buf_sz=0; return 1; } /* calculate required memory size */ a_sz=(iscolmaj)? m*m : 0; res_sz=m; d_sz=m; q_sz=m; if(prec!=SBA_CG_NOPREC){ s_sz=m; wk_sz=m; z_sz=(prec==SBA_CG_SSOR)? m : 0; } else s_sz=wk_sz=z_sz=0; tot_sz=a_sz+res_sz+d_sz+q_sz+s_sz+wk_sz+z_sz; if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */ if(buf) free(buf); /* free previously allocated memory */ buf_sz=tot_sz; buf=(double *)malloc(buf_sz*sizeof(double)); if(!buf){ fprintf(stderr, "memory allocation request failed in sba_Axb_CG()\n"); exit(1); } } if(iscolmaj){ a=buf; /* store A (row major!) into a */ for(i=0; i<m; ++i) for(j=0, aim=a+i*m; j<m; ++j) aim[j]=A[i+j*m]; } else a=A; /* no copying required */ res=buf+a_sz; d=res+res_sz; q=d+d_sz; if(prec!=SBA_CG_NOPREC){ s=q+q_sz; wk=s+s_sz; z=(prec==SBA_CG_SSOR)? wk+wk_sz : NULL; for(i=0; i<m; ++i){ // compute jacobi (i.e. diagonal) preconditioners and save them in wk sum=a[i*m+i]; if(sum>DBL_EPSILON || -sum<-DBL_EPSILON) // != 0.0 wk[i]=1.0/sum; else wk[i]=1.0/DBL_EPSILON; } } else{ s=res; wk=z=NULL; } if(niter>0){ for(i=0; i<m; ++i){ // clear solution and initialize residual vector: res <-- B x[i]=0.0; res[i]=B[i]; } } else{ niter=-niter; for(i=0; i<m; ++i){ // initialize residual vector: res <-- B - A*x for(j=0, aim=a+i*m, sum=0.0; j<m; ++j) sum+=aim[j]*x[j]; res[i]=B[i]-sum; } } switch(prec){ case SBA_CG_NOPREC: for(i=0, deltanew=0.0; i<m; ++i){ d[i]=res[i]; deltanew+=res[i]*res[i]; } break; case SBA_CG_JACOBI: // jacobi preconditioning for(i=0, deltanew=0.0; i<m; ++i){ d[i]=res[i]*wk[i]; deltanew+=res[i]*d[i]; } break; case SBA_CG_SSOR: // SSOR preconditioning; see the "templates" book, fig. 3.2, p. 44 for(i=0; i<m; ++i){ for(j=0, sum=0.0, aim=a+i*m; j<i; ++j) sum+=aim[j]*z[j]; z[i]=wk[i]*(res[i]-sum); } for(i=m-1; i>=0; --i){ for(j=i+1, sum=0.0, aim=a+i*m; j<m; ++j) sum+=aim[j]*d[j]; d[i]=z[i]-wk[i]*sum; } deltanew=dprod(m, res, d); break; default: fprintf(stderr, "unknown preconditioning option %d in sba_Axb_CG\n", prec); exit(1); } delta0=deltanew; for(iter=1; deltanew>eps_sq*delta0 && iter<=niter; ++iter){ for(i=0; i<m; ++i){ // q <-- A d aim=a+i*m;/*** for(j=0, sum=0.0; j<m; ++j) sum+=aim[j]*d[j];***/ q[i]=dprod(m, aim, d); //sum; }/*** for(i=0, sum=0.0; i<m; ++i) sum+=d[i]*q[i];***/ alpha=deltanew/dprod(m, d, q); // deltanew/sum;/*** for(i=0; i<m; ++i) x[i]+=alpha*d[i];***/ daxpy(m, x, x, alpha, d); if(!(iter%50)){ for(i=0; i<m; ++i){ // accurate computation of the residual vector aim=a+i*m;/*** for(j=0, sum=0.0; j<m; ++j) sum+=aim[j]*x[j];***/ res[i]=B[i]-dprod(m, aim, x); //B[i]-sum; } rec_res=0; } else{/*** for(i=0; i<m; ++i) // approximate computation of the residual vector res[i]-=alpha*q[i];***/ daxpy(m, res, res, -alpha, q); rec_res=1; } if(prec){ switch(prec){ case SBA_CG_JACOBI: // jacobi for(i=0; i<m; ++i) s[i]=res[i]*wk[i]; break; case SBA_CG_SSOR: // SSOR for(i=0; i<m; ++i){ for(j=0, sum=0.0, aim=a+i*m; j<i; ++j) sum+=aim[j]*z[j]; z[i]=wk[i]*(res[i]-sum); } for(i=m-1; i>=0; --i){ for(j=i+1, sum=0.0, aim=a+i*m; j<m; ++j) sum+=aim[j]*s[j]; s[i]=z[i]-wk[i]*sum; } break; } } deltaold=deltanew;/*** for(i=0, sum=0.0; i<m; ++i) sum+=res[i]*s[i];***/ deltanew=dprod(m, res, s); //sum; /* make sure that we get around small delta that are due to * accumulated floating point roundoff errors */ if(rec_res && deltanew<=eps_sq*delta0){ /* analytically recompute delta */ for(i=0; i<m; ++i){ for(j=0, aim=a+i*m, sum=0.0; j<m; ++j) sum+=aim[j]*x[j]; res[i]=B[i]-sum; } deltanew=dprod(m, res, s); } beta=deltanew/deltaold;/*** for(i=0; i<m; ++i) d[i]=s[i]+beta*d[i];***/ daxpy(m, d, s, beta, d); } return iter;}/* * This function computes the Cholesky decomposition of the inverse of a symmetric * (covariance) matrix A into B, i.e. B is s.t. A^-1=B^t*B and B upper triangular. * A and B can coincide * * The function returns 0 in case of error (e.g. A is singular), * 1 if successfull * * This function is often called repetitively to operate on matrices of identical * dimensions. To avoid repetitive malloc's and free's, allocated memory is * retained between calls and free'd-malloc'ed when not of the appropriate size. * A call with NULL as the first argument forces this memory to be released. * */#if 0int sba_mat_cholinv(double *A, double *B, int m){static double *buf=NULL;static int buf_sz=0, nb=0;int a_sz, ipiv_sz, work_sz, tot_sz;register int i, j;int info, *ipiv;double *a, *work; if(A==NULL){ if(buf) free(buf); buf=NULL; buf_sz=0; return 1; } /* calculate the required memory size */ ipiv_sz=m; a_sz=m*m; if(!nb){#ifndef SBA_LS_SCARCE_MEMORY double tmp; work_sz=-1; // workspace query; optimal size is returned in tmp F77_FUNC(dgetri)((int *)&m, NULL, (int *)&m, NULL, (double *)&tmp, (int *)&work_sz, (int *)&info); nb=((int)tmp)/m; // optimal worksize is m*nb#else nb=1; // min worksize is m#endif /* SBA_LS_SCARCE_MEMORY */ } work_sz=nb*m; tot_sz=ipiv_sz*sizeof(int) + (a_sz + work_sz)*sizeof(double); if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */ if(buf) free(buf); /* free previously allocated memory */ buf_sz=tot_sz; buf=(double *)malloc(buf_sz); if(!buf){ fprintf(stderr, "memory allocation in sba_mat_cholinv() failed!\n"); exit(1); } } ipiv=(int *)buf; a=(double *)(ipiv + ipiv_sz); work=a+a_sz; /* step 1: invert A */ /* store A into a; A is assumed symmetric, hence no transposition is needed */ for(i=0; i<m*m; ++i) a[i]=A[i]; /* LU decomposition for A (Cholesky should also do) */ F77_FUNC(dgetf2)((int *)&m, (int *)&m, a, (int *)&m, ipiv, (int *)&info); //F77_FUNC(dgetrf)((int *)&m, (int *)&m, a, (int *)&m, ipiv, (int *)&info); if(info!=0){ if(info<0){ fprintf(stderr, "argument %d of dgetf2/dgetrf illegal in sba_mat_cholinv()\n", -info); exit(1); } else{ fprintf(stderr, "singular matrix A for dgetf2/dgetrf in sba_mat_cholinv()\n"); return 0; } } /* (A)^{-1} from LU */ F77_FUNC(dgetri)((int *)&m, a, (int *)&m, ipiv, work, (int *)&work_sz, (int *)&info); if(info!=0){ if(info<0){ fprintf(stderr, "argument %d of dgetri illegal in sba_mat_cholinv()\n", -info); exit(1); } else{ fprintf(stderr, "singular matrix A for dgetri in sba_mat_cholinv()\n"); return 0; } } /* (A)^{-1} is now in a (in column major!) */ /* step 2: Cholesky decomposition of a: A^-1=B^t B, B upper triangular */ F77_FUNC(dpotf2)("U", (int *)&m, a, (int *)&m, (int *)&info); /* error treatment */ if(info!=0){ if(info<0){ fprintf(stderr, "LAPACK error: illegal value for argument %d of dpotf2 in sba_mat_cholinv()\n", -info); exit(1); } else{ fprintf(stderr, "LAPACK error: the leading minor of order %d is not positive definite,\n%s\n", info, "and the Cholesky factorization could not be completed in sba_mat_cholinv()"); return 0; } } /* the decomposition is in the upper part of a (in column-major order!). * copying it to the lower part and zeroing the upper transposes * a in row-major order */ for(i=0; i<m; ++i) for(j=0; j<i; ++j){ a[i+j*m]=a[j+i*m]; a[j+i*m]=0.0; } for(i=0; i<m*m; ++i) B[i]=a[i]; return 1;}#endifint sba_mat_cholinv(double *A, double *B, int m){int a_sz;register int i, j;int info;double *a; if(A==NULL){ return 1; } a_sz=m*m; a=B; /* use B as working memory, result is produced in it */ /* step 1: invert A */ /* store A into a; A is assumed symmetric, hence no transposition is needed */ for(i=0; i<a_sz; ++i) a[i]=A[i]; /* Cholesky decomposition for A */ F77_FUNC(dpotf2)("U", (int *)&m, a, (int *)&m, (int *)&info); if(info!=0){ if(info<0){ fprintf(stderr, "argument %d of dpotf2 illegal in sba_mat_cholinv()\n", -info); exit(1); } else{ fprintf(stderr, "LAPACK error: the leading minor of order %d is not positive definite,\n%s\n", info, "and the Cholesky factorization could not be completed in sba_mat_cholinv()"); return 0; } } /* (A)^{-1} from Cholesky */ F77_FUNC(dpotri)("U", (int *)&m, a, (int *)&m, (int *)&info); if(info!=0){ if(info<0){ fprintf(stderr, "argument %d of dpotri illegal in sba_mat_cholinv()\n", -info); exit(1); } else{ fprintf(stderr, "the (%d, %d) element of the factor U or L is zero, singular matrix A for dpotri in sba_mat_cholinv()\n", info, info); return 0; } } /* (A)^{-1} is now in a (in column major!) */ /* step 2: Cholesky decomposition of a: A^-1=B^t B, B upper triangular */ F77_FUNC(dpotf2)("U", (int *)&m, a, (int *)&m, (int *)&info); /* error treatment */ if(info!=0){ if(info<0){ fprintf(stderr, "LAPACK error: illegal value for argument %d of dpotf2 in sba_mat_cholinv()\n", -info); exit(1); } else{ fprintf(stderr, "LAPACK error: the leading minor of order %d is not positive definite,\n%s\n", info, "and the Cholesky factorization could not be completed in sba_mat_cholinv()"); return 0; } } /* the decomposition is in the upper part of a (in column-major order!). * copying it to the lower part and zeroing the upper transposes * a in row-major order */ for(i=0; i<m; ++i) for(j=0; j<i; ++j){ a[i+j*m]=a[j+i*m]; a[j+i*m]=0.0; } return 1;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -