📄 patchfinder.cc
字号:
mv2SubPixPos = mv2CoarsePos; // Start the sub-pixel search at the result of the coarse search.. mdMeanDiff = 0.0;}// Iterate inverse composition until convergence. Since it should never have // to travel more than a pixel's distance, set a max number of iterations; // if this is exceeded, consider the IC to have failed.bool PatchFinder::IterateSubPixToConvergence(KeyFrame &kf, int nMaxIts){ const double dConvLimit = 0.03; bool bConverged = false; int nIts; for(nIts = 0; nIts < nMaxIts && !bConverged; nIts++) { double dUpdateSquared = IterateSubPix(kf); if(dUpdateSquared < 0) // went off edge of image return false; if(dUpdateSquared < dConvLimit*dConvLimit) return true; } return false;}// Single iteration of inverse composition. This compares integral image positions in the // template image to floating point positions in the target keyframe. Interpolation is// bilinear, and performed manually (rather than using CVD::image_interpolate) since // this is a special case where the mixing fractions for each pixel are identical.double PatchFinder::IterateSubPix(KeyFrame &kf){ // Search level pos of patch center Vector<2> v2Center = LevelNPos(mv2SubPixPos, mnSearchLevel); BasicImage<byte> &im = kf.aLevels[mnSearchLevel].im; if(!im.in_image_with_border(ir_rounded(v2Center), mnPatchSize / 2 + 1)) return -1.0; // Negative return value indicates off edge of image // Position of top-left corner of patch in search level Vector<2> v2Base = v2Center - vec(mirCenter); // I.C. JT*d accumulator Vector<3> v3Accum; Zero(v3Accum); ImageRef ir; byte* pTopLeftPixel; // Each template pixel will be compared to an interpolated target pixel // The target value is made using bilinear interpolation as the weighted sum // of four target image pixels. Calculate mixing fractions: double dX = v2Base[0]-floor(v2Base[0]); // Distances from pixel center of TL pixel double dY = v2Base[1]-floor(v2Base[1]); float fMixTL = (1.0 - dX) * (1.0 - dY); float fMixTR = (dX) * (1.0 - dY); float fMixBL = (1.0 - dX) * (dY); float fMixBR = (dX) * (dY); // Loop over template image unsigned long nRowOffset = &kf.aLevels[mnSearchLevel].im[ImageRef(0,1)] - &kf.aLevels[mnSearchLevel].im[ImageRef(0,0)]; for(ir.y = 1; ir.y < mnPatchSize - 1; ir.y++) { pTopLeftPixel = &im[::ir(v2Base) + ImageRef(1,ir.y)]; // n.b. the x=1 offset, as with y for(ir.x = 1; ir.x < mnPatchSize - 1; ir.x++) { float fPixel = // Calc target interpolated pixel fMixTL * pTopLeftPixel[0] + fMixTR * pTopLeftPixel[1] + fMixBL * pTopLeftPixel[nRowOffset] + fMixBR * pTopLeftPixel[nRowOffset + 1]; pTopLeftPixel++; double dDiff = fPixel - mimTemplate[ir] + mdMeanDiff; v3Accum[0] += dDiff * mimJacs[ir - ImageRef(1,1)].first; v3Accum[1] += dDiff * mimJacs[ir - ImageRef(1,1)].second; v3Accum[2] += dDiff; // Update JT*d }; } // All done looping over image - find JTJ^-1 * JTd: Vector<3> v3Update = mm3HInv * v3Accum; mv2SubPixPos -= v3Update.slice<0,2>() * LevelScale(mnSearchLevel); mdMeanDiff -= v3Update[2]; double dPixelUpdateSquared = v3Update.slice<0,2>() * v3Update.slice<0,2>(); return dPixelUpdateSquared;}//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //// ZMSSDatpoint, which is SSE optimised, follows//// The top version is the SSE version for 8x8 patches. It is compiled// only if CVD_HAVE_XMMINTRIN is true, also you need to give your // compiler the appropriate flags (e.g. -march=core2 -msse3 for g++.)// The standard c++ version, which is about half as quick (not a disaster// by any means) is below.//// The 8x8 SSE version looks long because it has been unrolled, // it just does the same thing eight times. Both versions are one-pass// and need pre-calculated template sums and sum-squares.////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////#if CVD_HAVE_XMMINTRIN// Horizontal sum of uint16s stored in an XMM registerinline int SumXMM_16(__m128i &target){ unsigned short int sums_store[8]; _mm_storeu_si128((__m128i*)sums_store, target); return sums_store[0] + sums_store[1] + sums_store[2] + sums_store[3] + sums_store[4] + sums_store[5] + sums_store[6] + sums_store[7];}// Horizontal sum of uint32s stored in an XMM registerinline int SumXMM_32(__m128i &target){ unsigned int sums_store[4]; _mm_storeu_si128((__m128i*)sums_store, target); return sums_store[0] + sums_store[1] + sums_store[2] + sums_store[3];}#endif// Calculate the Zero-mean SSD of the coarse patch and a target imate at a specific // point.int PatchFinder::ZMSSDAtPoint(CVD::BasicImage<CVD::byte> &im, const CVD::ImageRef &ir){ if(!im.in_image_with_border(ir, mirCenter[0])) return mnMaxSSD + 1; ImageRef irImgBase = ir - mirCenter; byte *imagepointer; byte *templatepointer; int nImageSumSq = 0; int nImageSum = 0; int nCrossSum = 0;#if CVD_HAVE_XMMINTRIN if(mnPatchSize == 8) { long unsigned int imagepointerincrement; __m128i xImageAsEightBytes; __m128i xImageAsWords; __m128i xTemplateAsEightBytes; __m128i xTemplateAsWords; __m128i xZero; __m128i xImageSums; // These sums are 8xuint16 __m128i xImageSqSums; // These sums are 4xint32 __m128i xCrossSums; // These sums are 4xint32 __m128i xProduct; xImageSums = _mm_setzero_si128(); xImageSqSums = _mm_setzero_si128(); xCrossSums = _mm_setzero_si128(); xZero = _mm_setzero_si128(); imagepointer = &im[irImgBase + ImageRef(0,0)]; templatepointer = &mimTemplate[ImageRef(0,0)]; imagepointerincrement = &im[irImgBase + ImageRef(0,1)] - imagepointer; xImageAsEightBytes=_mm_loadl_epi64((__m128i*) imagepointer); imagepointer += imagepointerincrement; xImageAsWords = _mm_unpacklo_epi8(xImageAsEightBytes,xZero); xImageSums = _mm_adds_epu16(xImageAsWords,xImageSums); xProduct = _mm_madd_epi16(xImageAsWords, xImageAsWords); xImageSqSums = _mm_add_epi32(xProduct, xImageSqSums); xTemplateAsEightBytes=_mm_load_si128((__m128i*) templatepointer); templatepointer += 16; xTemplateAsWords = _mm_unpacklo_epi8(xTemplateAsEightBytes,xZero); xProduct = _mm_madd_epi16(xImageAsWords, xTemplateAsWords); xCrossSums = _mm_add_epi32(xProduct, xCrossSums); xImageAsEightBytes=_mm_loadl_epi64((__m128i*) imagepointer); imagepointer += imagepointerincrement; xImageAsWords = _mm_unpacklo_epi8(xImageAsEightBytes,xZero); xImageSums = _mm_adds_epu16(xImageAsWords,xImageSums); xProduct = _mm_madd_epi16(xImageAsWords, xImageAsWords); xImageSqSums = _mm_add_epi32(xProduct, xImageSqSums); xTemplateAsWords = _mm_unpackhi_epi8(xTemplateAsEightBytes,xZero); xProduct = _mm_madd_epi16(xImageAsWords, xTemplateAsWords); xCrossSums = _mm_add_epi32(xProduct, xCrossSums); xImageAsEightBytes=_mm_loadl_epi64((__m128i*) imagepointer); imagepointer += imagepointerincrement; xImageAsWords = _mm_unpacklo_epi8(xImageAsEightBytes,xZero); xImageSums = _mm_adds_epu16(xImageAsWords,xImageSums); xProduct = _mm_madd_epi16(xImageAsWords, xImageAsWords); xImageSqSums = _mm_add_epi32(xProduct, xImageSqSums); xTemplateAsEightBytes=_mm_load_si128((__m128i*) templatepointer); templatepointer += 16; xTemplateAsWords = _mm_unpacklo_epi8(xTemplateAsEightBytes,xZero); xProduct = _mm_madd_epi16(xImageAsWords, xTemplateAsWords); xCrossSums = _mm_add_epi32(xProduct, xCrossSums); xImageAsEightBytes=_mm_loadl_epi64((__m128i*) imagepointer); imagepointer += imagepointerincrement; xImageAsWords = _mm_unpacklo_epi8(xImageAsEightBytes,xZero); xImageSums = _mm_adds_epu16(xImageAsWords,xImageSums); xProduct = _mm_madd_epi16(xImageAsWords, xImageAsWords); xImageSqSums = _mm_add_epi32(xProduct, xImageSqSums); xTemplateAsWords = _mm_unpackhi_epi8(xTemplateAsEightBytes,xZero); xProduct = _mm_madd_epi16(xImageAsWords, xTemplateAsWords); xCrossSums = _mm_add_epi32(xProduct, xCrossSums); xImageAsEightBytes=_mm_loadl_epi64((__m128i*) imagepointer); imagepointer += imagepointerincrement; xImageAsWords = _mm_unpacklo_epi8(xImageAsEightBytes,xZero); xImageSums = _mm_adds_epu16(xImageAsWords,xImageSums); xProduct = _mm_madd_epi16(xImageAsWords, xImageAsWords); xImageSqSums = _mm_add_epi32(xProduct, xImageSqSums); xTemplateAsEightBytes=_mm_load_si128((__m128i*) templatepointer); templatepointer += 16; xTemplateAsWords = _mm_unpacklo_epi8(xTemplateAsEightBytes,xZero); xProduct = _mm_madd_epi16(xImageAsWords, xTemplateAsWords); xCrossSums = _mm_add_epi32(xProduct, xCrossSums); xImageAsEightBytes=_mm_loadl_epi64((__m128i*) imagepointer); imagepointer += imagepointerincrement; xImageAsWords = _mm_unpacklo_epi8(xImageAsEightBytes,xZero); xImageSums = _mm_adds_epu16(xImageAsWords,xImageSums); xProduct = _mm_madd_epi16(xImageAsWords, xImageAsWords); xImageSqSums = _mm_add_epi32(xProduct, xImageSqSums); xTemplateAsWords = _mm_unpackhi_epi8(xTemplateAsEightBytes,xZero); xProduct = _mm_madd_epi16(xImageAsWords, xTemplateAsWords); xCrossSums = _mm_add_epi32(xProduct, xCrossSums); xImageAsEightBytes=_mm_loadl_epi64((__m128i*) imagepointer); imagepointer += imagepointerincrement; xImageAsWords = _mm_unpacklo_epi8(xImageAsEightBytes,xZero); xImageSums = _mm_adds_epu16(xImageAsWords,xImageSums); xProduct = _mm_madd_epi16(xImageAsWords, xImageAsWords); xImageSqSums = _mm_add_epi32(xProduct, xImageSqSums); xTemplateAsEightBytes=_mm_load_si128((__m128i*) templatepointer); templatepointer += 16; xTemplateAsWords = _mm_unpacklo_epi8(xTemplateAsEightBytes,xZero); xProduct = _mm_madd_epi16(xImageAsWords, xTemplateAsWords); xCrossSums = _mm_add_epi32(xProduct, xCrossSums); xImageAsEightBytes=_mm_loadl_epi64((__m128i*) imagepointer); xImageAsWords = _mm_unpacklo_epi8(xImageAsEightBytes,xZero); xImageSums = _mm_adds_epu16(xImageAsWords,xImageSums); xProduct = _mm_madd_epi16(xImageAsWords, xImageAsWords); xImageSqSums = _mm_add_epi32(xProduct, xImageSqSums); xTemplateAsWords = _mm_unpackhi_epi8(xTemplateAsEightBytes,xZero); xProduct = _mm_madd_epi16(xImageAsWords, xTemplateAsWords); xCrossSums = _mm_add_epi32(xProduct, xCrossSums); nImageSum = SumXMM_16(xImageSums); nCrossSum = SumXMM_32(xCrossSums); nImageSumSq = SumXMM_32(xImageSqSums); } else#endif { for(int nRow = 0; nRow < mnPatchSize; nRow++) { imagepointer = &im[irImgBase + ImageRef(0,nRow)]; templatepointer = &mimTemplate[ImageRef(0,nRow)]; for(int nCol = 0; nCol < mnPatchSize; nCol++) { int n = imagepointer[nCol]; nImageSum += n; nImageSumSq += n*n; nCrossSum += n * templatepointer[nCol]; }; } }; int SA = mnTemplateSum; int SB = nImageSum; int N = mnPatchSize * mnPatchSize; return ((2*SA*SB - SA*SA - SB*SB)/N + nImageSumSq + mnTemplateSumSq - 2*nCrossSum);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -