📄 lmlec_core.c

📁 A sparse variant of the Levenberg-Marquardt algorithm implemented by levmar has been applied to bund
💻 C
📖 第 1 页 / 共 2 页
字号:
12 下一页
/////////////////////////////////////////////////////////////////////////////////// //  Levenberg - Marquardt non-linear minimization algorithm//  Copyright (C) 2004-05  Manolis Lourakis (lourakis at ics forth gr)//  Institute of Computer Science, Foundation for Research & Technology - Hellas//  Heraklion, Crete, Greece.////  This program is free software; you can redistribute it and/or modify//  it under the terms of the GNU General Public License as published by//  the Free Software Foundation; either version 2 of the License, or//  (at your option) any later version.////  This program is distributed in the hope that it will be useful,//  but WITHOUT ANY WARRANTY; without even the implied warranty of//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the//  GNU General Public License for more details.///////////////////////////////////////////////////////////////////////////////////#ifndef LM_REAL // not included by lmlec.c#error This file should not be compiled directly!#endif/* precision-specific definitions */#define LMLEC_DATA LM_ADD_PREFIX(lmlec_data)#define LMLEC_ELIM LM_ADD_PREFIX(lmlec_elim)#define LMLEC_FUNC LM_ADD_PREFIX(lmlec_func)#define LMLEC_JACF LM_ADD_PREFIX(lmlec_jacf)#define LEVMAR_LEC_DER LM_ADD_PREFIX(levmar_lec_der)#define LEVMAR_LEC_DIF LM_ADD_PREFIX(levmar_lec_dif)#define LEVMAR_DER LM_ADD_PREFIX(levmar_der)#define LEVMAR_DIF LM_ADD_PREFIX(levmar_dif)#define LEVMAR_TRANS_MAT_MAT_MULT LM_ADD_PREFIX(levmar_trans_mat_mat_mult)#define LEVMAR_COVAR LM_ADD_PREFIX(levmar_covar)#define LEVMAR_FDIF_FORW_JAC_APPROX LM_ADD_PREFIX(levmar_fdif_forw_jac_approx)#define GEQP3 LM_MK_LAPACK_NAME(geqp3)#define ORGQR LM_MK_LAPACK_NAME(orgqr)#define TRTRI LM_MK_LAPACK_NAME(trtri)struct LMLEC_DATA{  LM_REAL *c, *Z, *p, *jac;  int ncnstr;  void (*func)(LM_REAL *p, LM_REAL *hx, int m, int n, void *adata);  void (*jacf)(LM_REAL *p, LM_REAL *jac, int m, int n, void *adata);  void *adata;};/* prototypes for LAPACK routines */extern int GEQP3(int *m, int *n, LM_REAL *a, int *lda, int *jpvt,                   LM_REAL *tau, LM_REAL *work, int *lwork, int *info);extern int ORGQR(int *m, int *n, int *k, LM_REAL *a, int *lda, LM_REAL *tau,                   LM_REAL *work, int *lwork, int *info);extern int TRTRI(char *uplo, char *diag, int *n, LM_REAL *a, int *lda, int *info);/* * This function implements an elimination strategy for linearly constrained * optimization problems. The strategy relies on QR decomposition to transform * an optimization problem constrained by Ax=b to an equivalent, unconstrained * one. Also referred to as "null space" or "reduced Hessian" method. * See pp. 430-433 (chap. 15) of "Numerical Optimization" by Nocedal-Wright * for details. * * A is mxn with m<=n and rank(A)=m * Two matrices Y and Z of dimensions nxm and nx(n-m) are computed from A^T so that * their columns are orthonormal and every x can be written as x=Y*b + Z*x_z= * c + Z*x_z, where c=Y*b is a fixed vector of dimension n and x_z is an * arbitrary vector of dimension n-m. Then, the problem of minimizing f(x) * subject to Ax=b is equivalent to minimizing f(c + Z*x_z) with no constraints. * The computed Y and Z are such that any solution of Ax=b can be written as * x=Y*x_y + Z*x_z for some x_y, x_z. Furthermore, A*Y is nonsingular, A*Z=0 * and Z spans the null space of A. * * The function accepts A, b and computes c, Y, Z. If b or c is NULL, c is not * computed. Also, Y can be NULL in which case it is not referenced. * The function returns LM_ERROR in case of error, A's computed rank if successful * */static int LMLEC_ELIM(LM_REAL *A, LM_REAL *b, LM_REAL *c, LM_REAL *Y, LM_REAL *Z, int m, int n){static LM_REAL eps=LM_CNST(-1.0);LM_REAL *buf=NULL;LM_REAL *a, *tau, *work, *r, aux;register LM_REAL tmp;int a_sz, jpvt_sz, tau_sz, r_sz, Y_sz, worksz;int info, rank, *jpvt, tot_sz, mintmn, tm, tn;register int i, j, k;  if(m>n){    fprintf(stderr, RCAT("matrix of constraints cannot have more rows than columns in", LMLEC_ELIM) "()!\n");    return LM_ERROR;  }  tm=n; tn=m; // transpose dimensions  mintmn=m;  /* calculate required memory size */  worksz=-1; // workspace query. Optimal work size is returned in aux  //ORGQR((int *)&tm, (int *)&tm, (int *)&mintmn, NULL, (int *)&tm, NULL, (LM_REAL *)&aux, &worksz, &info);  GEQP3((int *)&tm, (int *)&tn, NULL, (int *)&tm, NULL, NULL, (LM_REAL *)&aux, (int *)&worksz, &info);  worksz=(int)aux;  a_sz=tm*tm; // tm*tn is enough for xgeqp3()  jpvt_sz=tn;  tau_sz=mintmn;  r_sz=mintmn*mintmn; // actually smaller if a is not of full row rank  Y_sz=(Y)? 0 : tm*tn;  tot_sz=(a_sz + tau_sz + r_sz + worksz + Y_sz)*sizeof(LM_REAL) + jpvt_sz*sizeof(int); /* should be arranged in that order for proper doubles alignment */  buf=(LM_REAL *)malloc(tot_sz); /* allocate a "big" memory chunk at once */  if(!buf){    fprintf(stderr, RCAT("Memory allocation request failed in ", LMLEC_ELIM) "()\n");    return LM_ERROR;  }  a=buf;  tau=a+a_sz;  r=tau+tau_sz;  work=r+r_sz;  if(!Y){    Y=work+worksz;    jpvt=(int *)(Y+Y_sz);  }  else    jpvt=(int *)(work+worksz);  /* copy input array so that LAPACK won't destroy it. Note that copying is   * done in row-major order, which equals A^T in column-major   */  for(i=0; i<tm*tn; ++i)      a[i]=A[i];  /* clear jpvt */  for(i=0; i<jpvt_sz; ++i) jpvt[i]=0;  /* rank revealing QR decomposition of A^T*/  GEQP3((int *)&tm, (int *)&tn, a, (int *)&tm, jpvt, tau, work, (int *)&worksz, &info);  //dgeqpf_((int *)&tm, (int *)&tn, a, (int *)&tm, jpvt, tau, work, &info);  /* error checking */  if(info!=0){    if(info<0){      fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", GEQP3) " in ", LMLEC_ELIM) "()\n", -info);    }    else if(info>0){      fprintf(stderr, RCAT(RCAT("unknown LAPACK error (%d) for ", GEQP3) " in ", LMLEC_ELIM) "()\n", info);    }    free(buf);    return LM_ERROR;  }  /* the upper triangular part of a now contains the upper triangle of the unpermuted R */  if(eps<0.0){    LM_REAL aux;    /* compute machine epsilon. DBL_EPSILON should do also */    for(eps=LM_CNST(1.0); aux=eps+LM_CNST(1.0), aux-LM_CNST(1.0)>0.0; eps*=LM_CNST(0.5))                              ;    eps*=LM_CNST(2.0);  }  tmp=tm*LM_CNST(10.0)*eps*FABS(a[0]); // threshold. tm is max(tm, tn)  tmp=(tmp>LM_CNST(1E-12))? tmp : LM_CNST(1E-12); // ensure that threshold is not too small  /* compute A^T's numerical rank by counting the non-zeros in R's diagonal */  for(i=rank=0; i<mintmn; ++i)    if(a[i*(tm+1)]>tmp || a[i*(tm+1)]<-tmp) ++rank; /* loop across R's diagonal elements */    else break; /* diagonal is arranged in absolute decreasing order */  if(rank<tn){    fprintf(stderr, RCAT("\nConstraints matrix in ",  LMLEC_ELIM) "() is not of full row rank (i.e. %d < %d)!\n"            "Make sure that you do not specify redundant or inconsistent constraints.\n\n", rank, tn);    free(buf);    return LM_ERROR;  }  /* compute the permuted inverse transpose of R */  /* first, copy R from the upper triangular part of a to r. R is rank x rank */  for(j=0; j<rank; ++j){    for(i=0; i<=j; ++i)      r[i+j*rank]=a[i+j*tm];    for(i=j+1; i<rank; ++i)      r[i+j*rank]=0.0; // lower part is zero  }  /* compute the inverse */  TRTRI("U", "N", (int *)&rank, r, (int *)&rank, &info);  /* error checking */  if(info!=0){    if(info<0){      fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", TRTRI) " in ", LMLEC_ELIM) "()\n", -info);    }    else if(info>0){      fprintf(stderr, RCAT(RCAT("A(%d, %d) is exactly zero for ", TRTRI) " (singular matrix) in ", LMLEC_ELIM) "()\n", info, info);    }    free(buf);    return LM_ERROR;  }  /* then, transpose r in place */  for(i=0; i<rank; ++i)    for(j=i+1; j<rank; ++j){      tmp=r[i+j*rank];      k=j+i*rank;      r[i+j*rank]=r[k];      r[k]=tmp;  }  /* finally, permute R^-T using Y as intermediate storage */  for(j=0; j<rank; ++j)    for(i=0, k=jpvt[j]-1; i<rank; ++i)      Y[i+k*rank]=r[i+j*rank];  for(i=0; i<rank*rank; ++i) // copy back to r    r[i]=Y[i];  /* resize a to be tm x tm, filling with zeroes */  for(i=tm*tn; i<tm*tm; ++i)    a[i]=0.0;  /* compute Q in a as the product of elementary reflectors. Q is tm x tm */  ORGQR((int *)&tm, (int *)&tm, (int *)&mintmn, a, (int *)&tm, tau, work, &worksz, &info);  /* error checking */  if(info!=0){    if(info<0){      fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", ORGQR) " in ", LMLEC_ELIM) "()\n", -info);    }    else if(info>0){      fprintf(stderr, RCAT(RCAT("unknown LAPACK error (%d) for ", ORGQR) " in ", LMLEC_ELIM) "()\n", info);    }    free(buf);    return LM_ERROR;  }  /* compute Y=Q_1*R^-T*P^T. Y is tm x rank */  for(i=0; i<tm; ++i)    for(j=0; j<rank; ++j){      for(k=0, tmp=0.0; k<rank; ++k)        tmp+=a[i+k*tm]*r[k+j*rank];      Y[i*rank+j]=tmp;    }  if(b && c){    /* compute c=Y*b */    for(i=0; i<tm; ++i){      for(j=0, tmp=0.0; j<rank; ++j)        tmp+=Y[i*rank+j]*b[j];      c[i]=tmp;    }  }  /* copy Q_2 into Z. Z is tm x (tm-rank) */  for(j=0; j<tm-rank; ++j)    for(i=0, k=j+rank; i<tm; ++i)      Z[i*(tm-rank)+j]=a[i+k*tm];  free(buf);  return rank;}/* constrained measurements: given pp, compute the measurements at c + Z*pp */static void LMLEC_FUNC(LM_REAL *pp, LM_REAL *hx, int mm, int n, void *adata){struct LMLEC_DATA *data=(struct LMLEC_DATA *)adata;int m;register int i, j;register LM_REAL sum;LM_REAL *c, *Z, *p, *Zimm;  m=mm+data->ncnstr;  c=data->c;  Z=data->Z;  p=data->p;  /* p=c + Z*pp */  for(i=0; i<m; ++i){    Zimm=Z+i*mm;    for(j=0, sum=c[i]; j<mm; ++j)      sum+=Zimm[j]*pp[j]; // sum+=Z[i*mm+j]*pp[j];    p[i]=sum;  }  (*(data->func))(p, hx, m, n, data->adata);}/* constrained Jacobian: given pp, compute the Jacobian at c + Z*pp * Using the chain rule, the Jacobian with respect to pp equals the * product of the Jacobian with respect to p (at c + Z*pp) times Z */static void LMLEC_JACF(LM_REAL *pp, LM_REAL *jacjac, int mm, int n, void *adata){struct LMLEC_DATA *data=(struct LMLEC_DATA *)adata;int m;register int i, j, l;register LM_REAL sum, *aux1, *aux2;LM_REAL *c, *Z, *p, *jac;   m=mm+data->ncnstr;  c=data->c;  Z=data->Z;  p=data->p;  jac=data->jac;  /* p=c + Z*pp */  for(i=0; i<m; ++i){    aux1=Z+i*mm;    for(j=0, sum=c[i]; j<mm; ++j)      sum+=aux1[j]*pp[j]; // sum+=Z[i*mm+j]*pp[j];    p[i]=sum;  }  (*(data->jacf))(p, jac, m, n, data->adata);  /* compute jac*Z in jacjac */  if(n*m<=__BLOCKSZ__SQ){ // this is a small problem    /* This is the straightforward way to compute jac*Z. However, due to     * its noncontinuous memory access pattern, it incures many cache misses when     * applied to large minimization problems (i.e. problems involving a large     * number of free variables and measurements), in which jac is too large to     * fit in the L1 cache. For such problems, a cache-efficient blocking scheme     * is preferable. On the other hand, the straightforward algorithm is faster     * on small problems since in this case it avoids the overheads of blocking.     */    for(i=0; i<n; ++i){      aux1=jac+i*m;      aux2=jacjac+i*mm;      for(j=0; j<mm; ++j){        for(l=0, sum=0.0; l<m; ++l)
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -