📄 misc_core.c
字号:
/////////////////////////////////////////////////////////////////////////////////// // Levenberg - Marquardt non-linear minimization algorithm// Copyright (C) 2004-05 Manolis Lourakis (lourakis at ics forth gr)// Institute of Computer Science, Foundation for Research & Technology - Hellas// Heraklion, Crete, Greece.//// This program is free software; you can redistribute it and/or modify// it under the terms of the GNU General Public License as published by// the Free Software Foundation; either version 2 of the License, or// (at your option) any later version.//// This program is distributed in the hope that it will be useful,// but WITHOUT ANY WARRANTY; without even the implied warranty of// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the// GNU General Public License for more details.///////////////////////////////////////////////////////////////////////////////////#ifndef LM_REAL // not included by misc.c#error This file should not be compiled directly!#endif/* precision-specific definitions */#define LEVMAR_CHKJAC LM_ADD_PREFIX(levmar_chkjac)#define LEVMAR_FDIF_FORW_JAC_APPROX LM_ADD_PREFIX(levmar_fdif_forw_jac_approx)#define LEVMAR_FDIF_CENT_JAC_APPROX LM_ADD_PREFIX(levmar_fdif_cent_jac_approx)#define LEVMAR_TRANS_MAT_MAT_MULT LM_ADD_PREFIX(levmar_trans_mat_mat_mult)#define LEVMAR_COVAR LM_ADD_PREFIX(levmar_covar)#define LEVMAR_STDDEV LM_ADD_PREFIX(levmar_stddev)#define LEVMAR_CORCOEF LM_ADD_PREFIX(levmar_corcoef)#define LEVMAR_R2 LM_ADD_PREFIX(levmar_R2)#define LEVMAR_BOX_CHECK LM_ADD_PREFIX(levmar_box_check)#define LEVMAR_L2NRMXMY LM_ADD_PREFIX(levmar_L2nrmxmy)#ifdef HAVE_LAPACK#define LEVMAR_PSEUDOINVERSE LM_ADD_PREFIX(levmar_pseudoinverse)static int LEVMAR_PSEUDOINVERSE(LM_REAL *A, LM_REAL *B, int m);/* BLAS matrix multiplication & LAPACK SVD routines */#ifdef LM_BLAS_PREFIX#define GEMM LM_CAT_(LM_BLAS_PREFIX, LM_ADD_PREFIX(LM_CAT_(gemm, LM_BLAS_SUFFIX)))#else#define GEMM LM_ADD_PREFIX(LM_CAT_(gemm, LM_BLAS_SUFFIX))#endif/* C := alpha*op( A )*op( B ) + beta*C */extern void GEMM(char *transa, char *transb, int *m, int *n, int *k, LM_REAL *alpha, LM_REAL *a, int *lda, LM_REAL *b, int *ldb, LM_REAL *beta, LM_REAL *c, int *ldc);#define GESVD LM_MK_LAPACK_NAME(gesvd)#define GESDD LM_MK_LAPACK_NAME(gesdd)extern int GESVD(char *jobu, char *jobvt, int *m, int *n, LM_REAL *a, int *lda, LM_REAL *s, LM_REAL *u, int *ldu, LM_REAL *vt, int *ldvt, LM_REAL *work, int *lwork, int *info);/* lapack 3.0 new SVD routine, faster than xgesvd() */extern int GESDD(char *jobz, int *m, int *n, LM_REAL *a, int *lda, LM_REAL *s, LM_REAL *u, int *ldu, LM_REAL *vt, int *ldvt, LM_REAL *work, int *lwork, int *iwork, int *info);/* Cholesky decomposition */#define POTF2 LM_MK_LAPACK_NAME(potf2)extern int POTF2(char *uplo, int *n, LM_REAL *a, int *lda, int *info);#define LEVMAR_CHOLESKY LM_ADD_PREFIX(levmar_chol)#else#define LEVMAR_LUINVERSE LM_ADD_PREFIX(levmar_LUinverse_noLapack)static int LEVMAR_LUINVERSE(LM_REAL *A, LM_REAL *B, int m);#endif /* HAVE_LAPACK *//* blocked multiplication of the transpose of the nxm matrix a with itself (i.e. a^T a) * using a block size of bsize. The product is returned in b. * Since a^T a is symmetric, its computation can be sped up by computing only its * upper triangular part and copying it to the lower part. * * More details on blocking can be found at * http://www-2.cs.cmu.edu/afs/cs/academic/class/15213-f02/www/R07/section_a/Recitation07-SectionA.pdf */void LEVMAR_TRANS_MAT_MAT_MULT(LM_REAL *a, LM_REAL *b, int n, int m){#ifdef HAVE_LAPACK /* use BLAS matrix multiply */LM_REAL alpha=LM_CNST(1.0), beta=LM_CNST(0.0); /* Fool BLAS to compute a^T*a avoiding transposing a: a is equivalent to a^T in column major, * therefore BLAS computes a*a^T with a and a*a^T in column major, which is equivalent to * computing a^T*a in row major! */ GEMM("N", "T", &m, &m, &n, &alpha, a, &m, a, &m, &beta, b, &m);#else /* no LAPACK, use blocking-based multiply */register int i, j, k, jj, kk;register LM_REAL sum, *bim, *akm;const int bsize=__BLOCKSZ__;#define __MIN__(x, y) (((x)<=(y))? (x) : (y))#define __MAX__(x, y) (((x)>=(y))? (x) : (y)) /* compute upper triangular part using blocking */ for(jj=0; jj<m; jj+=bsize){ for(i=0; i<m; ++i){ bim=b+i*m; for(j=__MAX__(jj, i); j<__MIN__(jj+bsize, m); ++j) bim[j]=0.0; //b[i*m+j]=0.0; } for(kk=0; kk<n; kk+=bsize){ for(i=0; i<m; ++i){ bim=b+i*m; for(j=__MAX__(jj, i); j<__MIN__(jj+bsize, m); ++j){ sum=0.0; for(k=kk; k<__MIN__(kk+bsize, n); ++k){ akm=a+k*m; sum+=akm[i]*akm[j]; //a[k*m+i]*a[k*m+j]; } bim[j]+=sum; //b[i*m+j]+=sum; } } } } /* copy upper triangular part to the lower one */ for(i=0; i<m; ++i) for(j=0; j<i; ++j) b[i*m+j]=b[j*m+i];#undef __MIN__#undef __MAX__#endif /* HAVE_LAPACK */}/* forward finite difference approximation to the Jacobian of func */void LEVMAR_FDIF_FORW_JAC_APPROX( void (*func)(LM_REAL *p, LM_REAL *hx, int m, int n, void *adata), /* function to differentiate */ LM_REAL *p, /* I: current parameter estimate, mx1 */ LM_REAL *hx, /* I: func evaluated at p, i.e. hx=func(p), nx1 */ LM_REAL *hxx, /* W/O: work array for evaluating func(p+delta), nx1 */ LM_REAL delta, /* increment for computing the Jacobian */ LM_REAL *jac, /* O: array for storing approximated Jacobian, nxm */ int m, int n, void *adata){register int i, j;LM_REAL tmp;register LM_REAL d; for(j=0; j<m; ++j){ /* determine d=max(1E-04*|p[j]|, delta), see HZ */ d=LM_CNST(1E-04)*p[j]; // force evaluation d=FABS(d); if(d<delta) d=delta; tmp=p[j]; p[j]+=d; (*func)(p, hxx, m, n, adata); p[j]=tmp; /* restore */ d=LM_CNST(1.0)/d; /* invert so that divisions can be carried out faster as multiplications */ for(i=0; i<n; ++i){ jac[i*m+j]=(hxx[i]-hx[i])*d; } }}/* central finite difference approximation to the Jacobian of func */void LEVMAR_FDIF_CENT_JAC_APPROX( void (*func)(LM_REAL *p, LM_REAL *hx, int m, int n, void *adata), /* function to differentiate */ LM_REAL *p, /* I: current parameter estimate, mx1 */ LM_REAL *hxm, /* W/O: work array for evaluating func(p-delta), nx1 */ LM_REAL *hxp, /* W/O: work array for evaluating func(p+delta), nx1 */ LM_REAL delta, /* increment for computing the Jacobian */ LM_REAL *jac, /* O: array for storing approximated Jacobian, nxm */ int m, int n, void *adata){register int i, j;LM_REAL tmp;register LM_REAL d; for(j=0; j<m; ++j){ /* determine d=max(1E-04*|p[j]|, delta), see HZ */ d=LM_CNST(1E-04)*p[j]; // force evaluation d=FABS(d); if(d<delta) d=delta; tmp=p[j]; p[j]-=d; (*func)(p, hxm, m, n, adata); p[j]=tmp+d; (*func)(p, hxp, m, n, adata); p[j]=tmp; /* restore */ d=LM_CNST(0.5)/d; /* invert so that divisions can be carried out faster as multiplications */ for(i=0; i<n; ++i){ jac[i*m+j]=(hxp[i]-hxm[i])*d; } }}/* * Check the Jacobian of a n-valued nonlinear function in m variables * evaluated at a point p, for consistency with the function itself. * * Based on fortran77 subroutine CHKDER by * Burton S. Garbow, Kenneth E. Hillstrom, Jorge J. More * Argonne National Laboratory. MINPACK project. March 1980. * * * func points to a function from R^m --> R^n: Given a p in R^m it yields hx in R^n * jacf points to a function implementing the Jacobian of func, whose correctness * is to be tested. Given a p in R^m, jacf computes into the nxm matrix j the * Jacobian of func at p. Note that row i of j corresponds to the gradient of * the i-th component of func, evaluated at p. * p is an input array of length m containing the point of evaluation. * m is the number of variables * n is the number of functions * adata points to possible additional data and is passed uninterpreted * to func, jacf. * err is an array of length n. On output, err contains measures * of correctness of the respective gradients. if there is * no severe loss of significance, then if err[i] is 1.0 the * i-th gradient is correct, while if err[i] is 0.0 the i-th * gradient is incorrect. For values of err between 0.0 and 1.0, * the categorization is less certain. In general, a value of * err[i] greater than 0.5 indicates that the i-th gradient is * probably correct, while a value of err[i] less than 0.5 * indicates that the i-th gradient is probably incorrect. * * * The function does not perform reliably if cancellation or * rounding errors cause a severe loss of significance in the * evaluation of a function. therefore, none of the components * of p should be unusually small (in particular, zero) or any * other value which may cause loss of significance. */void LEVMAR_CHKJAC( void (*func)(LM_REAL *p, LM_REAL *hx, int m, int n, void *adata), void (*jacf)(LM_REAL *p, LM_REAL *j, int m, int n, void *adata), LM_REAL *p, int m, int n, void *adata, LM_REAL *err){LM_REAL factor=LM_CNST(100.0);LM_REAL one=LM_CNST(1.0);LM_REAL zero=LM_CNST(0.0);LM_REAL *fvec, *fjac, *pp, *fvecp, *buf;register int i, j;LM_REAL eps, epsf, temp, epsmch;LM_REAL epslog;int fvec_sz=n, fjac_sz=n*m, pp_sz=m, fvecp_sz=n; epsmch=LM_REAL_EPSILON; eps=(LM_REAL)sqrt(epsmch); buf=(LM_REAL *)malloc((fvec_sz + fjac_sz + pp_sz + fvecp_sz)*sizeof(LM_REAL)); if(!buf){ fprintf(stderr, LCAT(LEVMAR_CHKJAC, "(): memory allocation request failed\n")); exit(1); } fvec=buf; fjac=fvec+fvec_sz; pp=fjac+fjac_sz; fvecp=pp+pp_sz; /* compute fvec=func(p) */ (*func)(p, fvec, m, n, adata); /* compute the Jacobian at p */ (*jacf)(p, fjac, m, n, adata); /* compute pp */ for(j=0; j<m; ++j){ temp=eps*FABS(p[j]); if(temp==zero) temp=eps; pp[j]=p[j]+temp; } /* compute fvecp=func(pp) */ (*func)(pp, fvecp, m, n, adata); epsf=factor*epsmch; epslog=(LM_REAL)log10(eps); for(i=0; i<n; ++i) err[i]=zero; for(j=0; j<m; ++j){ temp=FABS(p[j]); if(temp==zero) temp=one; for(i=0; i<n; ++i) err[i]+=temp*fjac[i*m+j]; } for(i=0; i<n; ++i){ temp=one; if(fvec[i]!=zero && fvecp[i]!=zero && FABS(fvecp[i]-fvec[i])>=epsf*FABS(fvec[i])) temp=eps*FABS((fvecp[i]-fvec[i])/eps - err[i])/(FABS(fvec[i])+FABS(fvecp[i])); err[i]=one; if(temp>epsmch && temp<eps) err[i]=((LM_REAL)log10(temp) - epslog)/epslog; if(temp>=eps) err[i]=zero; } free(buf); return;}#ifdef HAVE_LAPACK/* * This function computes the pseudoinverse of a square matrix A * into B using SVD. A and B can coincide * * The function returns 0 in case of error (e.g. A is singular), * the rank of A if successful * * A, B are mxm * */static int LEVMAR_PSEUDOINVERSE(LM_REAL *A, LM_REAL *B, int m){LM_REAL *buf=NULL;int buf_sz=0;static LM_REAL eps=LM_CNST(-1.0);register int i, j;LM_REAL *a, *u, *s, *vt, *work;int a_sz, u_sz, s_sz, vt_sz, tot_sz;LM_REAL thresh, one_over_denom;int info, rank, worksz, *iwork, iworksz; /* calculate required memory size */ worksz=5*m; // min worksize for GESVD //worksz=m*(7*m+4); // min worksize for GESDD iworksz=8*m; a_sz=m*m; u_sz=m*m; s_sz=m; vt_sz=m*m; tot_sz=(a_sz + u_sz + s_sz + vt_sz + worksz)*sizeof(LM_REAL) + iworksz*sizeof(int); /* should be arranged in that order for proper doubles alignment */ buf_sz=tot_sz; buf=(LM_REAL *)malloc(buf_sz); if(!buf){ fprintf(stderr, RCAT("memory allocation in ", LEVMAR_PSEUDOINVERSE) "() failed!\n"); return 0; /* error */ } a=buf; u=a+a_sz; s=u+u_sz; vt=s+s_sz; work=vt+vt_sz; iwork=(int *)(work+worksz); /* store A (column major!) into a */ for(i=0; i<m; i++) for(j=0; j<m; j++) a[i+j*m]=A[i*m+j]; /* SVD decomposition of A */ GESVD("A", "A", (int *)&m, (int *)&m, a, (int *)&m, s, u, (int *)&m, vt, (int *)&m, work, (int *)&worksz, &info); //GESDD("A", (int *)&m, (int *)&m, a, (int *)&m, s, u, (int *)&m, vt, (int *)&m, work, (int *)&worksz, iwork, &info); /* error treatment */ if(info!=0){ if(info<0){ fprintf(stderr, RCAT(RCAT(RCAT("LAPACK error: illegal value for argument %d of ", GESVD), "/" GESDD) " in ", LEVMAR_PSEUDOINVERSE) "()\n", -info); } else{ fprintf(stderr, RCAT("LAPACK error: dgesdd (dbdsdc)/dgesvd (dbdsqr) failed to converge in ", LEVMAR_PSEUDOINVERSE) "() [info=%d]\n", info); } free(buf); return 0; } if(eps<0.0){ LM_REAL aux; /* compute machine epsilon */ for(eps=LM_CNST(1.0); aux=eps+LM_CNST(1.0), aux-LM_CNST(1.0)>0.0; eps*=LM_CNST(0.5)) ; eps*=LM_CNST(2.0); } /* compute the pseudoinverse in B */ for(i=0; i<a_sz; i++) B[i]=0.0; /* initialize to zero */ for(rank=0, thresh=eps*s[0]; rank<m && s[rank]>thresh; rank++){ one_over_denom=LM_CNST(1.0)/s[rank]; for(j=0; j<m; j++) for(i=0; i<m; i++) B[i*m+j]+=vt[rank+i*m]*u[j+rank*m]*one_over_denom; } free(buf);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -