📄 patchfinder.cc

📁 this is software for visual SLAM
💻 CC
📖 第 1 页 / 共 2 页
字号:
上一页 12
  mv2SubPixPos = mv2CoarsePos; // Start the sub-pixel search at the result of the coarse search..  mdMeanDiff = 0.0;}// Iterate inverse composition until convergence. Since it should never have // to travel more than a pixel's distance, set a max number of iterations; // if this is exceeded, consider the IC to have failed.bool PatchFinder::IterateSubPixToConvergence(KeyFrame &kf, int nMaxIts){  const double dConvLimit = 0.03;  bool bConverged = false;  int nIts;  for(nIts = 0; nIts < nMaxIts && !bConverged; nIts++)    {      double dUpdateSquared = IterateSubPix(kf);      if(dUpdateSquared < 0) // went off edge of image	return false;      if(dUpdateSquared < dConvLimit*dConvLimit)	return true;    }  return false;}// Single iteration of inverse composition. This compares integral image positions in the // template image to floating point positions in the target keyframe. Interpolation is// bilinear, and performed manually (rather than using CVD::image_interpolate) since // this is a special case where the mixing fractions for each pixel are identical.double PatchFinder::IterateSubPix(KeyFrame &kf){  // Search level pos of patch center  Vector<2> v2Center = LevelNPos(mv2SubPixPos, mnSearchLevel);  BasicImage<byte> &im = kf.aLevels[mnSearchLevel].im;  if(!im.in_image_with_border(ir_rounded(v2Center), mnPatchSize / 2 + 1))    return -1.0;       // Negative return value indicates off edge of image     // Position of top-left corner of patch in search level  Vector<2> v2Base = v2Center - vec(mirCenter);    // I.C. JT*d accumulator  Vector<3> v3Accum;  Zero(v3Accum);    ImageRef ir;    byte* pTopLeftPixel;    // Each template pixel will be compared to an interpolated target pixel  // The target value is made using bilinear interpolation as the weighted sum  // of four target image pixels. Calculate mixing fractions:  double dX = v2Base[0]-floor(v2Base[0]); // Distances from pixel center of TL pixel  double dY = v2Base[1]-floor(v2Base[1]);  float fMixTL = (1.0 - dX) * (1.0 - dY);  float fMixTR = (dX)       * (1.0 - dY);  float fMixBL = (1.0 - dX) * (dY);  float fMixBR = (dX)       * (dY);    // Loop over template image  unsigned long nRowOffset = &kf.aLevels[mnSearchLevel].im[ImageRef(0,1)] - &kf.aLevels[mnSearchLevel].im[ImageRef(0,0)];  for(ir.y = 1; ir.y < mnPatchSize - 1; ir.y++)    {      pTopLeftPixel = &im[::ir(v2Base) + ImageRef(1,ir.y)]; // n.b. the x=1 offset, as with y      for(ir.x = 1; ir.x < mnPatchSize - 1; ir.x++)	{	  float fPixel =   // Calc target interpolated pixel	    fMixTL * pTopLeftPixel[0]          + fMixTR * pTopLeftPixel[1] + 	    fMixBL * pTopLeftPixel[nRowOffset] + fMixBR * pTopLeftPixel[nRowOffset + 1];	  pTopLeftPixel++;	  double dDiff = fPixel - mimTemplate[ir] + mdMeanDiff;	  v3Accum[0] += dDiff * mimJacs[ir - ImageRef(1,1)].first;	  v3Accum[1] += dDiff * mimJacs[ir - ImageRef(1,1)].second;	  v3Accum[2] += dDiff;  // Update JT*d	};    }    // All done looping over image - find JTJ^-1 * JTd:  Vector<3> v3Update = mm3HInv * v3Accum;   mv2SubPixPos -= v3Update.slice<0,2>() * LevelScale(mnSearchLevel);  mdMeanDiff -= v3Update[2];    double dPixelUpdateSquared = v3Update.slice<0,2>() * v3Update.slice<0,2>();  return dPixelUpdateSquared;}//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// ////              ZMSSDatpoint, which is SSE optimised, follows//// The top version is the SSE version for 8x8 patches. It is compiled// only if CVD_HAVE_XMMINTRIN is true, also you need to give your // compiler the appropriate flags (e.g. -march=core2 -msse3 for g++.)// The standard c++ version, which is about half as quick (not a disaster// by any means) is below.//// The 8x8 SSE version looks long because it has been unrolled, // it just does the same thing eight times. Both versions are one-pass// and need pre-calculated template sums and sum-squares.////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////#if CVD_HAVE_XMMINTRIN// Horizontal sum of uint16s stored in an XMM registerinline int SumXMM_16(__m128i &target){  unsigned short int sums_store[8];      _mm_storeu_si128((__m128i*)sums_store, target);  return sums_store[0] + sums_store[1] + sums_store[2] + sums_store[3] +    sums_store[4] + sums_store[5] + sums_store[6] + sums_store[7];}// Horizontal sum of uint32s stored in an XMM registerinline int SumXMM_32(__m128i &target){  unsigned int sums_store[4];      _mm_storeu_si128((__m128i*)sums_store, target);  return sums_store[0] + sums_store[1] + sums_store[2] + sums_store[3];}#endif// Calculate the Zero-mean SSD of the coarse patch and a target imate at a specific // point.int PatchFinder::ZMSSDAtPoint(CVD::BasicImage<CVD::byte> &im, const CVD::ImageRef &ir){  if(!im.in_image_with_border(ir, mirCenter[0]))    return mnMaxSSD + 1;    ImageRef irImgBase = ir - mirCenter;  byte *imagepointer;  byte *templatepointer;    int nImageSumSq = 0;  int nImageSum = 0;  int nCrossSum = 0;#if CVD_HAVE_XMMINTRIN  if(mnPatchSize == 8)    {      long unsigned int imagepointerincrement;      __m128i xImageAsEightBytes;      __m128i xImageAsWords;      __m128i xTemplateAsEightBytes;      __m128i xTemplateAsWords;      __m128i xZero;      __m128i xImageSums; // These sums are 8xuint16      __m128i xImageSqSums; // These sums are 4xint32      __m128i xCrossSums;   // These sums are 4xint32      __m128i xProduct;            xImageSums = _mm_setzero_si128();      xImageSqSums = _mm_setzero_si128();      xCrossSums = _mm_setzero_si128();      xZero = _mm_setzero_si128();            imagepointer = &im[irImgBase + ImageRef(0,0)];      templatepointer = &mimTemplate[ImageRef(0,0)];      imagepointerincrement = &im[irImgBase + ImageRef(0,1)] - imagepointer;            xImageAsEightBytes=_mm_loadl_epi64((__m128i*) imagepointer);      imagepointer += imagepointerincrement;      xImageAsWords = _mm_unpacklo_epi8(xImageAsEightBytes,xZero);      xImageSums = _mm_adds_epu16(xImageAsWords,xImageSums);      xProduct = _mm_madd_epi16(xImageAsWords, xImageAsWords);      xImageSqSums = _mm_add_epi32(xProduct, xImageSqSums);      xTemplateAsEightBytes=_mm_load_si128((__m128i*) templatepointer);      templatepointer += 16;      xTemplateAsWords = _mm_unpacklo_epi8(xTemplateAsEightBytes,xZero);      xProduct = _mm_madd_epi16(xImageAsWords, xTemplateAsWords);      xCrossSums = _mm_add_epi32(xProduct, xCrossSums);      xImageAsEightBytes=_mm_loadl_epi64((__m128i*) imagepointer);      imagepointer += imagepointerincrement;      xImageAsWords = _mm_unpacklo_epi8(xImageAsEightBytes,xZero);      xImageSums = _mm_adds_epu16(xImageAsWords,xImageSums);      xProduct = _mm_madd_epi16(xImageAsWords, xImageAsWords);      xImageSqSums = _mm_add_epi32(xProduct, xImageSqSums);      xTemplateAsWords = _mm_unpackhi_epi8(xTemplateAsEightBytes,xZero);      xProduct = _mm_madd_epi16(xImageAsWords, xTemplateAsWords);      xCrossSums = _mm_add_epi32(xProduct, xCrossSums);      xImageAsEightBytes=_mm_loadl_epi64((__m128i*) imagepointer);      imagepointer += imagepointerincrement;      xImageAsWords = _mm_unpacklo_epi8(xImageAsEightBytes,xZero);      xImageSums = _mm_adds_epu16(xImageAsWords,xImageSums);      xProduct = _mm_madd_epi16(xImageAsWords, xImageAsWords);      xImageSqSums = _mm_add_epi32(xProduct, xImageSqSums);      xTemplateAsEightBytes=_mm_load_si128((__m128i*) templatepointer);      templatepointer += 16;      xTemplateAsWords = _mm_unpacklo_epi8(xTemplateAsEightBytes,xZero);      xProduct = _mm_madd_epi16(xImageAsWords, xTemplateAsWords);      xCrossSums = _mm_add_epi32(xProduct, xCrossSums);      xImageAsEightBytes=_mm_loadl_epi64((__m128i*) imagepointer);      imagepointer += imagepointerincrement;      xImageAsWords = _mm_unpacklo_epi8(xImageAsEightBytes,xZero);      xImageSums = _mm_adds_epu16(xImageAsWords,xImageSums);      xProduct = _mm_madd_epi16(xImageAsWords, xImageAsWords);      xImageSqSums = _mm_add_epi32(xProduct, xImageSqSums);      xTemplateAsWords = _mm_unpackhi_epi8(xTemplateAsEightBytes,xZero);      xProduct = _mm_madd_epi16(xImageAsWords, xTemplateAsWords);      xCrossSums = _mm_add_epi32(xProduct, xCrossSums);      xImageAsEightBytes=_mm_loadl_epi64((__m128i*) imagepointer);      imagepointer += imagepointerincrement;      xImageAsWords = _mm_unpacklo_epi8(xImageAsEightBytes,xZero);      xImageSums = _mm_adds_epu16(xImageAsWords,xImageSums);      xProduct = _mm_madd_epi16(xImageAsWords, xImageAsWords);      xImageSqSums = _mm_add_epi32(xProduct, xImageSqSums);      xTemplateAsEightBytes=_mm_load_si128((__m128i*) templatepointer);      templatepointer += 16;      xTemplateAsWords = _mm_unpacklo_epi8(xTemplateAsEightBytes,xZero);      xProduct = _mm_madd_epi16(xImageAsWords, xTemplateAsWords);      xCrossSums = _mm_add_epi32(xProduct, xCrossSums);      xImageAsEightBytes=_mm_loadl_epi64((__m128i*) imagepointer);      imagepointer += imagepointerincrement;      xImageAsWords = _mm_unpacklo_epi8(xImageAsEightBytes,xZero);      xImageSums = _mm_adds_epu16(xImageAsWords,xImageSums);      xProduct = _mm_madd_epi16(xImageAsWords, xImageAsWords);      xImageSqSums = _mm_add_epi32(xProduct, xImageSqSums);      xTemplateAsWords = _mm_unpackhi_epi8(xTemplateAsEightBytes,xZero);      xProduct = _mm_madd_epi16(xImageAsWords, xTemplateAsWords);      xCrossSums = _mm_add_epi32(xProduct, xCrossSums);      xImageAsEightBytes=_mm_loadl_epi64((__m128i*) imagepointer);      imagepointer += imagepointerincrement;      xImageAsWords = _mm_unpacklo_epi8(xImageAsEightBytes,xZero);      xImageSums = _mm_adds_epu16(xImageAsWords,xImageSums);      xProduct = _mm_madd_epi16(xImageAsWords, xImageAsWords);      xImageSqSums = _mm_add_epi32(xProduct, xImageSqSums);      xTemplateAsEightBytes=_mm_load_si128((__m128i*) templatepointer);      templatepointer += 16;      xTemplateAsWords = _mm_unpacklo_epi8(xTemplateAsEightBytes,xZero);      xProduct = _mm_madd_epi16(xImageAsWords, xTemplateAsWords);      xCrossSums = _mm_add_epi32(xProduct, xCrossSums);      xImageAsEightBytes=_mm_loadl_epi64((__m128i*) imagepointer);      xImageAsWords = _mm_unpacklo_epi8(xImageAsEightBytes,xZero);      xImageSums = _mm_adds_epu16(xImageAsWords,xImageSums);      xProduct = _mm_madd_epi16(xImageAsWords, xImageAsWords);      xImageSqSums = _mm_add_epi32(xProduct, xImageSqSums);      xTemplateAsWords = _mm_unpackhi_epi8(xTemplateAsEightBytes,xZero);      xProduct = _mm_madd_epi16(xImageAsWords, xTemplateAsWords);      xCrossSums = _mm_add_epi32(xProduct, xCrossSums);      nImageSum = SumXMM_16(xImageSums);      nCrossSum = SumXMM_32(xCrossSums);      nImageSumSq = SumXMM_32(xImageSqSums);    }  else#endif     {          for(int nRow = 0; nRow < mnPatchSize; nRow++)	{	  imagepointer = &im[irImgBase + ImageRef(0,nRow)];	  templatepointer = &mimTemplate[ImageRef(0,nRow)];	  for(int nCol = 0; nCol < mnPatchSize; nCol++)	    {	      int n = imagepointer[nCol];	      nImageSum += n;	      nImageSumSq += n*n;	      nCrossSum += n * templatepointer[nCol];	    };	}    };    int SA = mnTemplateSum;  int SB = nImageSum;    int N = mnPatchSize * mnPatchSize;  return ((2*SA*SB - SA*SA - SB*SB)/N + nImageSumSq + mnTemplateSumSq - 2*nCrossSum);}
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -