📄 sba_lapack.c

📁 sparse bundle ajustment的源码
💻 C
📖 第 1 页 / 共 4 页
字号:
}/* * This function computes the inverse of a square matrix whose upper triangle * is stored in A into its lower triangle using LU decomposition * * The function returns 0 in case of error (e.g. A is singular), * 1 if successfull * * This function is often called repetitively to solve problems of identical * dimensions. To avoid repetitive malloc's and free's, allocated memory is * retained between calls and free'd-malloc'ed when not of the appropriate size. * A call with NULL as the first argument forces this memory to be released. */int sba_symat_invert_LU(double *A, int m){static double *buf=NULL;static int buf_sz=0, nb=0;int a_sz, ipiv_sz, work_sz, tot_sz;register int i, j;int info, *ipiv;double *a, *work;     if(A==NULL){    if(buf) free(buf);    buf=NULL;    buf_sz=0;    return 1;  }  /* calculate required memory size */  ipiv_sz=m;  a_sz=m*m;  if(!nb){#ifndef SBA_LS_SCARCE_MEMORY    double tmp;    work_sz=-1; // workspace query; optimal size is returned in tmp    F77_FUNC(dgetri)((int *)&m, NULL, (int *)&m, NULL, (double *)&tmp, (int *)&work_sz, (int *)&info);    nb=((int)tmp)/m; // optimal worksize is m*nb#else    nb=1; // min worksize is m#endif /* SBA_LS_SCARCE_MEMORY */  }  work_sz=nb*m;  tot_sz=ipiv_sz*sizeof(int) + (a_sz + work_sz)*sizeof(double);   if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */    if(buf) free(buf); /* free previously allocated memory */    buf_sz=tot_sz;    buf=(double *)malloc(buf_sz);    if(!buf){      fprintf(stderr, "memory allocation in sba_symat_invert_LU() failed!\n");      exit(1);    }  }  ipiv=(int *)buf;  a=(double *)(ipiv + ipiv_sz);  work=a+a_sz;  /* store A (column major!) into a */	for(i=0; i<m; ++i)		for(j=i; j<m; ++j)			a[i+j*m]=a[j+i*m]=A[i*m+j]; // copy A's upper part to a's upper & lower  /* LU decomposition for A */	F77_FUNC(dgetrf)((int *)&m, (int *)&m, a, (int *)&m, ipiv, (int *)&info);  	if(info!=0){		if(info<0){			fprintf(stderr, "argument %d of dgetrf illegal in sba_symat_invert_LU()\n", -info);			exit(1);		}		else{			fprintf(stderr, "singular matrix A for dgetrf in sba_symat_invert_LU()\n");			return 0;		}	}  /* (A)^{-1} from LU */	F77_FUNC(dgetri)((int *)&m, a, (int *)&m, ipiv, work, (int *)&work_sz, (int *)&info);	if(info!=0){		if(info<0){			fprintf(stderr, "argument %d of dgetri illegal in sba_symat_invert_LU()\n", -info);			exit(1);		}		else{			fprintf(stderr, "singular matrix A for dgetri in sba_symat_invert_LU()\n");			return 0;		}	}	/* store (A)^{-1} in A's lower triangle */	for(i=0; i<m; ++i)		for(j=0; j<=i; ++j)      A[i*m+j]=a[i+j*m];	return 1;}/* * This function computes the inverse of a square symmetric positive definite  * matrix whose upper triangle is stored in A into its lower triangle using * Cholesky factorization * * The function returns 0 in case of error (e.g. A is not positive definite or singular), * 1 if successfull * * This function is often called repetitively to solve problems of identical * dimensions. To avoid repetitive malloc's and free's, allocated memory is * retained between calls and free'd-malloc'ed when not of the appropriate size. * A call with NULL as the first argument forces this memory to be released. */int sba_symat_invert_Chol(double *A, int m){static double *buf=NULL;static int buf_sz=0;int a_sz, tot_sz;register int i, j;int info;double *a;     if(A==NULL){    if(buf) free(buf);    buf=NULL;    buf_sz=0;    return 1;  }  /* calculate required memory size */  a_sz=m*m;  tot_sz=a_sz;   if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */    if(buf) free(buf); /* free previously allocated memory */    buf_sz=tot_sz;    buf=(double *)malloc(buf_sz*sizeof(double));    if(!buf){      fprintf(stderr, "memory allocation in sba_symat_invert_Chol() failed!\n");      exit(1);    }  }  a=(double *)buf;  /* store A into a; A is assumed symmetric, hence no transposition is needed */  for(i=0, j=a_sz; i<j; ++i)    a[i]=A[i];  /* Cholesky factorization for A; a's lower part corresponds to A's upper */  //F77_FUNC(dpotrf)("L", (int *)&m, a, (int *)&m, (int *)&info);  F77_FUNC(dpotf2)("L", (int *)&m, a, (int *)&m, (int *)&info);  /* error treatment */  if(info!=0){    if(info<0){      fprintf(stderr, "LAPACK error: illegal value for argument %d of dpotrf in sba_symat_invert_Chol()\n", -info);      exit(1);    }    else{      fprintf(stderr, "LAPACK error: the leading minor of order %d is not positive definite,\nthe factorization could not be completed for dpotrf in sba_symat_invert_Chol()\n", info);      return 0;    }  }  /* (A)^{-1} from Cholesky */  F77_FUNC(dpotri)("L", (int *)&m, a, (int *)&m, (int *)&info);	if(info!=0){		if(info<0){			fprintf(stderr, "argument %d of dpotri illegal in sba_symat_invert_Chol()\n", -info);			exit(1);		}		else{			fprintf(stderr, "the (%d, %d) element of the factor U or L is zero, singular matrix A for dpotri in sba_symat_invert_Chol()\n", info, info);			return 0;		}	}	/* store (A)^{-1} in A's lower triangle. The lower triangle of the symmetric A^{-1} is in the lower triangle of a */	for(i=0; i<m; ++i)		for(j=0; j<=i; ++j)      A[i*m+j]=a[i+j*m];	return 1;}/* * This function computes the inverse of a symmetric indefinite  * matrix whose upper triangle is stored in A into its lower triangle * using LDLT factorization with the pivoting strategy of Bunch and Kaufman * * The function returns 0 in case of error (e.g. A is singular), * 1 if successfull * * This function is often called repetitively to solve problems of identical * dimensions. To avoid repetitive malloc's and free's, allocated memory is * retained between calls and free'd-malloc'ed when not of the appropriate size. * A call with NULL as the first argument forces this memory to be released. */int sba_symat_invert_BK(double *A, int m){static double *buf=NULL;static int buf_sz=0, nb=0;int a_sz, ipiv_sz, work_sz, tot_sz;register int i, j;int info, *ipiv;double *a, *work;     if(A==NULL){    if(buf) free(buf);    buf=NULL;    buf_sz=0;    return 1;  }  /* calculate required memory size */  ipiv_sz=m;  a_sz=m*m;  if(!nb){#ifndef SBA_LS_SCARCE_MEMORY    double tmp;    work_sz=-1; // workspace query; optimal size is returned in tmp    F77_FUNC(dsytrf)("L", (int *)&m, NULL, (int *)&m, NULL, (double *)&tmp, (int *)&work_sz, (int *)&info);    nb=((int)tmp)/m; // optimal worksize is m*nb#else    nb=-1; // min worksize is 1#endif /* SBA_LS_SCARCE_MEMORY */  }  work_sz=(nb!=-1)? nb*m : 1;  work_sz=(work_sz>=m)? work_sz : m; /* ensure that work is at least m elements long, as required by dsytri */  tot_sz=ipiv_sz*sizeof(int) + (a_sz + work_sz)*sizeof(double);  if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */    if(buf) free(buf); /* free previously allocated memory */    buf_sz=tot_sz;    buf=(double *)malloc(buf_sz);    if(!buf){      fprintf(stderr, "memory allocation in sba_symat_invert_BK() failed!\n");      exit(1);    }  }  ipiv=(int *)buf;  a=(double *)(ipiv + ipiv_sz);  work=a+a_sz;  /* store A into a; A is assumed symmetric, hence no transposition is needed */  for(i=0, j=a_sz; i<j; ++i)    a[i]=A[i];  /* LDLT factorization for A; a's lower part corresponds to A's upper */	F77_FUNC(dsytrf)("L", (int *)&m, a, (int *)&m, ipiv, work, (int *)&work_sz, (int *)&info);	if(info!=0){		if(info<0){			fprintf(stderr, "argument %d of dsytrf illegal in sba_symat_invert_BK()\n", -info);			exit(1);		}		else{			fprintf(stderr, "singular block diagonal matrix D for dsytrf in sba_symat_invert_BK() [D(%d, %d) is zero]\n", info, info);			return 0;		}	}  /* (A)^{-1} from LDLT */  F77_FUNC(dsytri)("L", (int *)&m, a, (int *)&m, ipiv, work, (int *)&info);	if(info!=0){		if(info<0){			fprintf(stderr, "argument %d of dsytri illegal in sba_symat_invert_BK()\n", -info);			exit(1);		}		else{			fprintf(stderr, "D(%d, %d)=0, matrix is singular and its inverse could not be computed in sba_symat_invert_BK()\n", info, info);			return 0;		}	}	/* store (A)^{-1} in A's lower triangle. The lower triangle of the symmetric A^{-1} is in the lower triangle of a */	for(i=0; i<m; ++i)		for(j=0; j<=i; ++j)      A[i*m+j]=a[i+j*m];	return 1;}#define __CG_LINALG_BLOCKSIZE           8/* Dot product of two vectors x and y using loop unrolling and blocking. * see http://www.abarnett.demon.co.uk/tutorial.html */inline static double dprod(const int n, const double *const x, const double *const y){register int i, j1, j2, j3, j4, j5, j6, j7; int blockn;register double sum0=0.0, sum1=0.0, sum2=0.0, sum3=0.0,                sum4=0.0, sum5=0.0, sum6=0.0, sum7=0.0;  /* n may not be divisible by __CG_LINALG_BLOCKSIZE,   * go as near as we can first, then tidy up.  */   blockn = (n / __CG_LINALG_BLOCKSIZE) * __CG_LINALG_BLOCKSIZE;   /* unroll the loop in blocks of `__CG_LINALG_BLOCKSIZE' */   for(i=0; i<blockn; i+=__CG_LINALG_BLOCKSIZE){            sum0+=x[i]*y[i];    j1=i+1; sum1+=x[j1]*y[j1];    j2=i+2; sum2+=x[j2]*y[j2];    j3=i+3; sum3+=x[j3]*y[j3];    j4=i+4; sum4+=x[j4]*y[j4];    j5=i+5; sum5+=x[j5]*y[j5];    j6=i+6; sum6+=x[j6]*y[j6];    j7=i+7; sum7+=x[j7]*y[j7];  }  /*   * There may be some left to do.  * This could be done as a simple for() loop,   * but a switch is faster (and more interesting)   */   if(i<n){     /* Jump into the case at the place that will allow    * us to finish off the appropriate number of items.     */     switch(n - i){       case 7 : sum0+=x[i]*y[i]; ++i;      case 6 : sum1+=x[i]*y[i]; ++i;      case 5 : sum2+=x[i]*y[i]; ++i;      case 4 : sum3+=x[i]*y[i]; ++i;      case 3 : sum4+=x[i]*y[i]; ++i;      case 2 : sum5+=x[i]*y[i]; ++i;      case 1 : sum6+=x[i]*y[i]; ++i;    }  }   return sum0+sum1+sum2+sum3+sum4+sum5+sum6+sum7;}/* Compute z=x+a*y for two vectors x and y and a scalar a; z can be one of x, y. * Similarly to the dot product routine, this one uses loop unrolling and blocking */inline static void daxpy(const int n, double *const z, const double *const x, const double a, const double *const y){ register int i, j1, j2, j3, j4, j5, j6, j7; int blockn;  /* n may not be divisible by __CG_LINALG_BLOCKSIZE,   * go as near as we can first, then tidy up.  */   blockn = (n / __CG_LINALG_BLOCKSIZE) * __CG_LINALG_BLOCKSIZE;   /* unroll the loop in blocks of `__CG_LINALG_BLOCKSIZE' */   for(i=0; i<blockn; i+=__CG_LINALG_BLOCKSIZE){            z[i]=x[i]+a*y[i];    j1=i+1; z[j1]=x[j1]+a*y[j1];    j2=i+2; z[j2]=x[j2]+a*y[j2];    j3=i+3; z[j3]=x[j3]+a*y[j3];    j4=i+4; z[j4]=x[j4]+a*y[j4];    j5=i+5; z[j5]=x[j5]+a*y[j5];    j6=i+6; z[j6]=x[j6]+a*y[j6];    j7=i+7; z[j7]=x[j7]+a*y[j7];  }  /*   * There may be some left to do.  * This could be done as a simple for() loop,   * but a switch is faster (and more interesting)   */   if(i<n){     /* Jump into the case at the place that will allow    * us to finish off the appropriate number of items.     */     switch(n - i){       case 7 : z[i]=x[i]+a*y[i]; ++i;      case 6 : z[i]=x[i]+a*y[i]; ++i;      case 5 : z[i]=x[i]+a*y[i]; ++i;      case 4 : z[i]=x[i]+a*y[i]; ++i;      case 3 : z[i]=x[i]+a*y[i]; ++i;      case 2 : z[i]=x[i]+a*y[i]; ++i;      case 1 : z[i]=x[i]+a*y[i]; ++i;    }  } }/* * This function returns the solution of Ax = b where A is posititive definite, * based on the conjugate gradients method; see "An intro to the CG method" by J.R. Shewchuk, p. 50-51 * * A is mxm, b, x are is mx1. Argument niter specifies the maximum number of  * iterations and eps is the desired solution accuracy. niter<0 signals that * x contains a valid initial approximation to the solution; if niter>0 then  * the starting point is taken to be zero. Argument prec selects the desired * preconditioning method as follows: * 0: no preconditioning * 1: jacobi (diagonal) preconditioning * 2: SSOR preconditioning * Argument iscolmaj specifies whether A is stored in column or row major order. * * The function returns 0 in case of error, * the number of iterations performed if successfull * * This function is often called repetitively to solve problems of identical * dimensions. To avoid repetitive malloc's and free's, allocated memory is * retained between calls and free'd-malloc'ed when not of the appropriate size. * A call with NULL as the first argument forces this memory to be released. */int sba_Axb_CG(double *A, double *B, double *x, int m, int niter, double eps, int prec, int iscolmaj){static double *buf=NULL;
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -