📄 sba_levmar.c
字号:
idxij.rowptr[i]=k; ii=i*m; for(j=0; j<m; ++j) if(vmask[ii+j]){ idxij.val[k]=k; idxij.colidx[k++]=j; } } idxij.rowptr[n]=nvis; /* find the maximum number (for all cameras) of visible image projections coming from a single 3D point */ for(i=maxCvis=0; i<n; ++i) if((k=idxij.rowptr[i+1]-idxij.rowptr[i])>maxCvis) maxCvis=k; /* find the maximum number (for all points) of visible image projections in any single camera */ for(j=maxPvis=0; j<m; ++j){ for(i=ii=0; i<n; ++i) if(vmask[i*m+j]) ++ii; if(ii>maxPvis) maxPvis=ii; } maxCPvis=(maxCvis>=maxPvis)? maxCvis : maxPvis;#if 0 /* determine the density of matrix S */ for(j=mcon, ii=0; j<m; ++j){ ++ii; /* block Sjj is surely nonzero */ for(k=j+1; k<m; ++k) if(sba_crsm_common_row(&idxij, j, k)) ii+=2; /* blocks Sjk & Skj are nonzero */ } printf("\nS density: %.5g\n", ((double)ii)/(mmcon*mmcon)); fflush(stdout);#endif /* allocate work arrays */ /* W is big enough to hold both jac & W. Note also the extra Wsz, see the initialization of jac below for explanation */ W=(double *)emalloc((nvis*((Wsz>=ABsz)? Wsz : ABsz) + Wsz)*sizeof(double)); U=(double *)emalloc(m*Usz*sizeof(double)); V=(double *)emalloc(n*Vsz*sizeof(double)); e=(double *)emalloc(nobs*sizeof(double)); eab=(double *)emalloc(nvars*sizeof(double)); E=(double *)emalloc(m*cnp*sizeof(double)); Yj=(double *)emalloc(maxPvis*Ysz*sizeof(double)); YWt=(double *)emalloc(YWtsz*sizeof(double)); S=(double *)emalloc(m*m*Sblsz*sizeof(double)); dp=(double *)emalloc(nvars*sizeof(double)); Wtda=(double *)emalloc(pnp*sizeof(double)); rcidxs=(int *)emalloc(maxCPvis*sizeof(int)); rcsubs=(int *)emalloc(maxCPvis*sizeof(int));#ifndef SBA_DESTROY_COVS if(covx!=NULL) wght=(double *)emalloc(nvis*covsz*sizeof(double));#else if(covx!=NULL) wght=covx;#endif /* SBA_DESTROY_COVS */ hx=(double *)emalloc(nobs*sizeof(double)); diagUV=(double *)emalloc(nvars*sizeof(double)); pdp=(double *)emalloc(nvars*sizeof(double)); /* to save resources, W and jac share the same memory: First, the jacobian * is computed in some working memory that is then overwritten during the * computation of W. To account for the case of W being larger than jac, * extra memory is reserved "before" jac. * Care must be taken, however, to ensure that storing a certain W_ij * does not overwrite the A_ij, B_ij used to compute it. To achieve * this is, note that if p1 and p2 respectively point to the first elements * of a certain W_ij and A_ij, B_ij pair, we should have p2-p1>=Wsz. * There are two cases: * a) Wsz>=ABsz: Then p1=W+k*Wsz and p2=jac+k*ABsz=W+Wsz+nvis*(Wsz-ABsz)+k*ABsz * for some k (0<=k<nvis), thus p2-p1=(nvis-k)*(Wsz-ABsz)+Wsz. * The right side of the last equation is obviously > Wsz for all 0<=k<nvis * * b) Wsz<ABsz: Then p1=W+k*Wsz and p2=jac+k*ABsz=W+Wsz+k*ABsz and * p2-p1=Wsz+k*(ABsz-Wsz), which is again > Wsz for all 0<=k<nvis * * Concluding, if jac is initialized as below, the memory allocated to all * W_ij is guaranteed not to overlap with that allocated to their corresponding * A_ij, B_ij pairs */ jac=W + Wsz + ((Wsz>ABsz)? nvis*(Wsz-ABsz) : 0); /* set up auxiliary pointers */ pa=p; pb=p+m*cnp; ea=eab; eb=eab+m*cnp; dpa=dp; dpb=dp+m*cnp; diagU=diagUV; diagV=diagUV + m*cnp; /* if no jacobian function is supplied, prepare to compute jacobian with finite difference */ if(!fjac){ fdj_data.func=func; fdj_data.cnp=cnp; fdj_data.pnp=pnp; fdj_data.mnp=mnp; fdj_data.hx=hx; fdj_data.hxx=(double *)emalloc(nobs*sizeof(double)); fdj_data.func_rcidxs=(int *)emalloc(2*maxCPvis*sizeof(int)); fdj_data.func_rcsubs=fdj_data.func_rcidxs+maxCPvis; fdj_data.adata=adata; fjac=sba_fdjac_x; jac_adata=(void *)&fdj_data; } else{ fdj_data.hxx=NULL; jac_adata=adata; } if(itmax==0){ /* verify jacobian */ sba_motstr_chkjac_x(func, fjac, p, &idxij, rcidxs, rcsubs, mcon, cnp, pnp, mnp, adata, jac_adata); retval=0; goto freemem_and_return; } /* covariances Sigma_x_ij are accommodated by computing the Cholesky decompositions of their * inverses and using the resulting matrices w_x_ij to weigh A_ij, B_ij, and e_ij as w_x_ij A_ij, * w_x_ij*B_ij and w_x_ij*e_ij. In this way, auxiliary variables as U_j=\sum_i A_ij^T A_ij * actually become \sum_i (w_x_ij A_ij)^T w_x_ij A_ij= \sum_i A_ij^T w_x_ij^T w_x_ij A_ij = * A_ij^T Sigma_x_ij^-1 A_ij * * ea_j, V_i, eb_i, W_ij are weighted in a similar manner */ if(covx!=NULL){ for(i=0; i<n; ++i){ nnz=sba_crsm_row_elmidxs(&idxij, i, rcidxs, rcsubs); /* find nonzero x_ij, j=0...m-1 */ for(j=0; j<nnz; ++j){ /* set ptr1, ptr2 to point to cov_x_ij, w_x_ij resp. */ ptr1=covx + (k=idxij.val[rcidxs[j]]*covsz); ptr2=wght + k; if(!sba_mat_cholinv(ptr1, ptr2, mnp)){ /* compute w_x_ij s.t. w_x_ij^T w_x_ij = cov_x_ij^-1 */ fprintf(stderr, "SBA: invalid covariance matrix for x_ij (i=%d, j=%d) in sba_motstr_levmar_x()\n", i, rcsubs[j]); retval=SBA_ERROR; goto freemem_and_return; } } } sba_mat_cholinv(NULL, NULL, 0); /* cleanup */ } /* compute the error vectors e_ij in hx */ (*func)(p, &idxij, rcidxs, rcsubs, hx, adata); nfev=1; /* ### compute e=x - f(p) [e=w*(x - f(p)] and its L2 norm */ if(covx==NULL) p_eL2=nrmL2xmy(e, x, hx, nobs); /* e=x-hx, p_eL2=||e|| */ else p_eL2=nrmCxmy(e, x, hx, wght, mnp, nvis); /* e=wght*(x-hx), p_eL2=||e||=||x-hx||_Sigma^-1 */ if(verbose) printf("initial motstr-SBA error %g [%g]\n", p_eL2, p_eL2/nvis); init_p_eL2=p_eL2; if(!SBA_FINITE(p_eL2)) stop=7; for(itno=0; itno<itmax && !stop; ++itno){ /* Note that p, e and ||e||_2 have been updated at the previous iteration */ /* compute derivative submatrices A_ij, B_ij */ (*fjac)(p, &idxij, rcidxs, rcsubs, jac, jac_adata); ++njev; if(covx!=NULL){ /* compute w_x_ij A_ij and w_x_ij B_ij. * Since w_x_ij is upper triangular, the products can be safely saved * directly in A_ij, B_ij, without the need for intermediate storage */ for(i=0; i<nvis; ++i){ /* set ptr1, ptr2, ptr3 to point to w_x_ij, A_ij, B_ij, resp. */ ptr1=wght + i*covsz; ptr2=jac + i*ABsz; ptr3=ptr2 + Asz; // ptr3=jac + i*ABsz + Asz; /* w_x_ij is mnp x mnp, A_ij is mnp x cnp, B_ij is mnp x pnp * NOTE: Jamming the outter (i.e., ii) loops did not run faster! */ /* A_ij */ for(ii=0; ii<mnp; ++ii) for(jj=0; jj<cnp; ++jj){ for(k=ii, sum=0.0; k<mnp; ++k) // k>=ii since w_x_ij is upper triangular sum+=ptr1[ii*mnp+k]*ptr2[k*cnp+jj]; ptr2[ii*cnp+jj]=sum; } /* B_ij */ for(ii=0; ii<mnp; ++ii) for(jj=0; jj<pnp; ++jj){ for(k=ii, sum=0.0; k<mnp; ++k) // k>=ii since w_x_ij is upper triangular sum+=ptr1[ii*mnp+k]*ptr3[k*pnp+jj]; ptr3[ii*pnp+jj]=sum; } } } /* compute U_j = \sum_i A_ij^T A_ij */ // \Sigma here! /* U_j is symmetric, therefore its computation can be sped up by * computing only the upper part and then reusing it for the lower one. * Recall that A_ij is mnp x cnp */ /* Also compute ea_j = \sum_i A_ij^T e_ij */ // \Sigma here! /* Recall that e_ij is mnp x 1 */ _dblzero(U, m*Usz); /* clear all U_j */ _dblzero(ea, m*easz); /* clear all ea_j */ for(j=mcon; j<m; ++j){ ptr1=U + j*Usz; // set ptr1 to point to U_j ptr2=ea + j*easz; // set ptr2 to point to ea_j nnz=sba_crsm_col_elmidxs(&idxij, j, rcidxs, rcsubs); /* find nonzero A_ij, i=0...n-1 */ for(i=0; i<nnz; ++i){ /* set ptr3 to point to A_ij, actual row number in rcsubs[i] */ ptr3=jac + idxij.val[rcidxs[i]]*ABsz; /* compute the UPPER TRIANGULAR PART of A_ij^T A_ij and add it to U_j */ for(ii=0; ii<cnp; ++ii){ for(jj=ii; jj<cnp; ++jj){ for(k=0, sum=0.0; k<mnp; ++k) sum+=ptr3[k*cnp+ii]*ptr3[k*cnp+jj]; ptr1[ii*cnp+jj]+=sum; } /* copy the LOWER TRIANGULAR PART of U_j from the upper one */ for(jj=0; jj<ii; ++jj) ptr1[ii*cnp+jj]=ptr1[jj*cnp+ii]; } ptr4=e + idxij.val[rcidxs[i]]*esz; /* set ptr4 to point to e_ij */ /* compute A_ij^T e_ij and add it to ea_j */ for(ii=0; ii<cnp; ++ii){ for(jj=0, sum=0.0; jj<mnp; ++jj) sum+=ptr3[jj*cnp+ii]*ptr4[jj]; ptr2[ii]+=sum; } } } /* compute V_i = \sum_j B_ij^T B_ij */ // \Sigma here! /* V_i is symmetric, therefore its computation can be sped up by * computing only the upper part and then reusing it for the lower one. * Recall that B_ij is mnp x pnp */ /* Also compute eb_i = \sum_j B_ij^T e_ij */ // \Sigma here! /* Recall that e_ij is mnp x 1 */ _dblzero(V, n*Vsz); /* clear all V_i */ _dblzero(eb, n*ebsz); /* clear all eb_i */ for(i=0; i<n; ++i){ ptr1=V + i*Vsz; // set ptr1 to point to V_i ptr2=eb + i*ebsz; // set ptr2 to point to eb_i nnz=sba_crsm_row_elmidxs(&idxij, i, rcidxs, rcsubs); /* find nonzero B_ij, j=0...m-1 */ for(j=0; j<nnz; ++j){ /* set ptr3 to point to B_ij, actual column number in rcsubs[j] */ ptr3=jac + idxij.val[rcidxs[j]]*ABsz + Asz; /* compute the UPPER TRIANGULAR PART of B_ij^T B_ij and add it to V_i */ for(ii=0; ii<pnp; ++ii){ for(jj=ii; jj<pnp; ++jj){ for(k=0, sum=0.0; k<mnp; ++k) sum+=ptr3[k*pnp+ii]*ptr3[k*pnp+jj]; ptr1[ii*pnp+jj]+=sum; } } ptr4=e + idxij.val[rcidxs[j]]*esz; /* set ptr4 to point to e_ij */ /* compute B_ij^T e_ij and add it to eb_i */ for(ii=0; ii<pnp; ++ii){ for(jj=0, sum=0.0; jj<mnp; ++jj) sum+=ptr3[jj*pnp+ii]*ptr4[jj]; ptr2[ii]+=sum; } } } /* compute W_ij = A_ij^T B_ij */ // \Sigma here! /* Recall that A_ij is mnp x cnp and B_ij is mnp x pnp */ for(i=0; i<n; ++i){ nnz=sba_crsm_row_elmidxs(&idxij, i, rcidxs, rcsubs); /* find nonzero W_ij, j=0...m-1 */ for(j=0; j<nnz; ++j){ /* set ptr1 to point to W_ij, actual column number in rcsubs[j] */ ptr1=W + idxij.val[rcidxs[j]]*Wsz; if(rcsubs[j]<mcon){ /* A_ij is zero */ _dblzero(ptr1, Wsz); /* clear W_ij */ continue; } /* set ptr2 & ptr3 to point to A_ij & B_ij resp. */ ptr2=jac + idxij.val[rcidxs[j]]*ABsz; ptr3=ptr2 + Asz; /* compute A_ij^T B_ij and store it in W_ij * Recall that storage for A_ij, B_ij does not overlap with that for W_ij, * see the comments related to the initialization of jac above */ /* assert(ptr2-ptr1>=Wsz); */ for(ii=0; ii<cnp; ++ii) for(jj=0; jj<pnp; ++jj){ for(k=0, sum=0.0; k<mnp; ++k) sum+=ptr2[k*cnp+ii]*ptr3[k*pnp+jj]; ptr1[ii*pnp+jj]=sum; } } } /* Compute ||J^T e||_inf and ||p||^2 */ for(i=0, p_L2=eab_inf=0.0; i<nvars; ++i){ if(eab_inf < (tmp=FABS(eab[i]))) eab_inf=tmp; p_L2+=p[i]*p[i]; } //p_L2=sqrt(p_L2); /* save diagonal entries so that augmentation can be later canceled. * Diagonal entries are in U_j and V_i */ for(j=mcon; j<m; ++j){ ptr1=U + j*Usz; // set ptr1 to point to U_j ptr2=diagU + j*cnp; // set ptr2 to point to diagU_j for(i=0; i<cnp; ++i) ptr2[i]=ptr1[i*cnp+i]; } for(i=0; i<n; ++i){ ptr1=V + i*Vsz; // set ptr1 to point to V_i ptr2=diagV + i*pnp; // set ptr2 to point to diagV_i for(j=0; j<pnp; ++j) ptr2[j]=ptr1[j*pnp+j];
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -