📄 jift.cpp
字号:
/******************************************************************************* * * FILENAME: JIFT.cpp * DESCRIPTION: Jun's invariant feature transformation (JIFT) * * AUTHOR: Jun Liu (jun@cs.man.ac.uk) * INSTITUTION: Advanced Interfaces Group, * Department of Computer Science, * the University of Manchester, United Kingdom * DATE: August 2005 * *******************************************************************************/#include <vil/vil_copy.h>#include <vil/vil_memory_image.h>#include <vil/vil_convert.h>#include <vil/vil_load.h>#include <vil/vil_save.h>#include <vil/vil_bilin_interp.h>#include <vil/vil_math.h>#include <vnl/algo/vnl_matrix_inverse.h>#include "JIFT.h"#include "Gaussian.h"const double ANTIALIAS_SIGMA = 0.5;const double PREBLUR_SIGMA = 1.0;const double CURVATURE_THRESHOLD = 10.0;const double CONTRAST_THRESHOLD = 0.02;const unsigned int SQUARE_SIZE = 3;const unsigned int NUM_BINS = 36;const unsigned int FEATURE_WINDOW_SIZE = 16;const unsigned int DESC_NUM_BINS = 8;const double FV_THRESHOLD = 0.2;const unsigned int FVSIZE = 128; // the constructor JIFT::JIFT(vil_image_view<vxl_byte> const& srcimg, unsigned int octaves, unsigned int intervals) : m_octaves(octaves), m_intervals(intervals){ init(srcimg);}JIFT::JIFT(string const& filename, unsigned int octaves, unsigned int intervals) : m_octaves(octaves), m_intervals(intervals){ vil_image_view<vxl_byte> srcimg = vil_load(filename.c_str()); init(srcimg);}JIFT::JIFT(const char * filename, unsigned int octaves, unsigned int intervals) : m_octaves(octaves), m_intervals(intervals){ vil_image_view<vxl_byte> srcimg = vil_load(filename); init(srcimg);}void JIFT::init(vil_image_view<vxl_byte> const& srcimg){ unsigned int i; // copy and store the source image m_srcimg = vil_copy_deep(srcimg); // allocate spaces for gaussian list, difference of gaussian list extrema and absolute sigma m_glist = new vil_image_view<double>*[m_octaves]; for (i=0;i<m_octaves;i++) m_glist[i] = new vil_image_view<double>[m_intervals+3]; m_doglist = new vil_image_view<double>*[m_octaves]; for (i=0;i<m_octaves;i++) m_doglist[i] = new vil_image_view<double>[m_intervals+2]; m_extrema = new vil_image_view<bool> * [m_octaves]; for (i=0;i<m_octaves;i++) m_extrema[i] = new vil_image_view<bool>[m_intervals]; m_abssigmas = new double*[m_octaves]; for (i=0;i<m_octaves;i++) m_abssigmas[i] = new double[m_intervals+3]; } // the destructor JIFT::~JIFT(){ unsigned int i; for (i=0;i<m_octaves;i++) { delete [] m_glist[m_octaves-i-1]; delete [] m_doglist[m_octaves-i-1]; delete [] m_extrema[m_octaves-i-1]; delete [] m_abssigmas[m_octaves-i-1]; } delete [] m_glist; delete [] m_doglist; delete [] m_extrema; delete [] m_abssigmas;}void JIFT::buildPyramid(void){ cout << endl << "Building image pyramid..." << endl; // Generate the gaussian and the difference of gaussian pyramids // In order to detect key points on s intervals per octave, // we must generate s+3 blurred images in the gaussian pyramid. // This is because s+3 blurred images generate s+2 DoG images, and two // images are needed (one at the highest and one lowest scales of the octave) // for extrema detection unsigned int i,j,k,t; // convert to gray scale image vil_image_view<double> gray_scale_image; // if it is an rgb colour image if (this -> m_srcimg.nplanes() == 3) vil_convert_planes_to_grey(this->m_srcimg,gray_scale_image); // if this is an gray scale image else if (this->m_srcimg.nplanes() ==1) vil_convert_cast(this->m_srcimg,gray_scale_image); // vil_convert_cast(this->m_srcimg,gray_scale_image); // Blur the image with a standard deviation of 0.5 to prevent aliasing // and then upsample the image by a factor of 2 using linear interpolation // Lowe claims that this increase the number of stable keypoints by a factor // of 4 // use gaussian filter with sigma = antialias_sigma Gaussian::gaussian_filter(gray_scale_image,gray_scale_image,ANTIALIAS_SIGMA); // double the size m_glist[0][0].set_size(gray_scale_image.ni()*2,gray_scale_image.nj()*2); // bilinear interpolation, and normalise (divided by 255.0) for (i=0;i<m_glist[0][0].ni();i++) for (j=0;j<m_glist[0][0].nj();j++) m_glist[0][0](i,j) = vil_bilin_interp_safe_extend(gray_scale_image, static_cast<double>(i)/2.0, static_cast<double>(j)/2.0)/255.0; // now pre-blur the base image with a sigma of PREBLUR_SIGMA Gaussian::gaussian_filter(m_glist[0][0],m_glist[0][0],PREBLUR_SIGMA); double init_sigma = sqrt(2); // Keep track of the absolute sigma m_abssigmas[0][0] = init_sigma * 0.5; // to construct gaussian scale space for (i=0;i<m_octaves;i++) { double sigma = init_sigma; for (j=1;j<m_intervals+3;j++) { // Compute the standard deviation of the gaussian filter needed to produce // the next level of geometrically sampled pyramid. // Here sigma_(i+1) = k * sigma // By definitiojn of successive convolution, the required blurring sigma_f // to produce sigma_(i+1) from sigma_i is // // sigma_(i+1)^2 = sigma_f,i^2 + sigma_i^2 // (k*sigma_i)^2 = sigma_f,i^2 + sigma_i^2 // // Therefore // // sigma_f, i = sqrt(k^2 -1) sigma_i // // where k = 2^(1/intervals) to span the octave, so // // sigma_f, i = sqrt(2^(2/intervals)-1)sigma_i double sigma_f = sqrt(pow(2.0,2.0/m_intervals)-1) * sigma; sigma = pow(2.0,1.0/m_intervals) * sigma; // keep track of absolute sigma m_abssigmas[i][j] = sigma * 0.5 * pow(2.0f,static_cast<float>(i)); Gaussian::gaussian_filter(m_glist[i][j-1],m_glist[i][j],sigma_f); // and difference of gaussian pyramid m_doglist[i][j-1].set_size(m_glist[i][j].ni(),m_glist[i][j].nj()); vil_math_image_difference(m_glist[i][j],m_glist[i][j-1],m_doglist[i][j-1]); } if (i < m_octaves-1) { // the gaussian image 2 images from the top of the stack for this octave // have been blurred by by 2*sigma. Subsample this image by a factor of // 2 to produce the first image of the next octave m_glist[i+1][0].set_size(m_glist[i][0].ni()/2,m_glist[i][0].nj()/2); for (k=0;k<m_glist[i+1][0].ni();k++) for (t=0;t<m_glist[i+1][0].nj();t++) m_glist[i+1][0](k,t) = m_glist[i][m_intervals](2*k,2*t); m_abssigmas[i+1][0] = m_abssigmas[i][m_intervals]; } }}void JIFT::detectLocalExtrema(void){ // The next step is to detect local maxima in the DOG pyramid. When // a maximum is found, two tests are applied before labeling it as a keypoint. // First, it mush have sufficient contrast. Second, it should not be an edge point // (i.e. the ration of principal curvation at the extremum should be below a threshold) // Compute threshold for the ratio of principle curvature test // applied to DoG extrema before classifying them as keypoints unsigned int i,j,xi,yi; double curvature_ratio, curvature_threshold; vil_image_view<double> middle, up, down; int scale; double dxx, dyy, dxy, trH, detH; unsigned int num = 0; curvature_threshold = (CURVATURE_THRESHOLD+1)*(CURVATURE_THRESHOLD+1)/CURVATURE_THRESHOLD; //detect local extrema in DoG pyramid for (i=0;i<m_octaves;i++) { scale = static_cast<int>(pow(2.0f,static_cast<float>(i))); for(j=1;j<m_intervals+1;j++) { m_extrema[i][j-1].set_size(m_doglist[i][0].ni(),m_doglist[i][0].nj()); m_extrema[i][j-1].fill(false); // fetch the DoG data middle = m_doglist[i][j]; up = m_doglist[i][j+1]; down = m_doglist[i][j-1]; for (xi=1;xi<m_doglist[i][0].ni()-1;xi++) for(yi=1;yi<m_doglist[i][0].nj()-1;yi++) { // if it is the local maximum if (middle(xi,yi) > middle(xi,yi-1) && middle(xi,yi) > middle(xi,yi+1) && middle(xi,yi) > middle(xi-1,yi) && middle(xi,yi) > middle(xi+1,yi) && middle(xi,yi) > middle(xi-1,yi-1) && middle(xi,yi) > middle(xi+1,yi-1) && middle(xi,yi) > middle(xi+1,yi+1) && middle(xi,yi) > middle(xi-1,yi+1) && middle(xi,yi) > up(xi,yi) && middle(xi,yi) > up(xi,yi-1) && middle(xi,yi) > up(xi,yi+1) && middle(xi,yi) > up(xi-1,yi) && middle(xi,yi) > up(xi+1,yi) && middle(xi,yi) > up(xi-1,yi-1) && middle(xi,yi) > up(xi+1,yi-1) && middle(xi,yi) > up(xi+1,yi+1) && middle(xi,yi) > up(xi-1,yi+1) && middle(xi,yi) > down(xi,yi) && middle(xi,yi) > down(xi,yi-1) && middle(xi,yi) > down(xi,yi+1) && middle(xi,yi) > down(xi-1,yi) && middle(xi,yi) > down(xi+1,yi) && middle(xi,yi) > down(xi-1,yi-1) && middle(xi,yi) > down(xi+1,yi-1) && middle(xi,yi) > down(xi+1,yi+1) && middle(xi,yi) > down(xi-1,yi+1) ) { m_extrema[i][j-1](xi,yi) = true; num ++; } // if it is the local minumum else if (middle(xi,yi) < middle(xi,yi-1) && middle(xi,yi) < middle(xi,yi+1) && middle(xi,yi) < middle(xi-1,yi) && middle(xi,yi) < middle(xi+1,yi) && middle(xi,yi) < middle(xi-1,yi-1) && middle(xi,yi) < middle(xi+1,yi-1) && middle(xi,yi) < middle(xi+1,yi+1) && middle(xi,yi) < middle(xi-1,yi+1) && middle(xi,yi) < up(xi,yi) && middle(xi,yi) < up(xi,yi-1) && middle(xi,yi) < up(xi,yi+1) && middle(xi,yi) < up(xi-1,yi) && middle(xi,yi) < up(xi+1,yi) && middle(xi,yi) < up(xi-1,yi-1) && middle(xi,yi) < up(xi+1,yi-1) && middle(xi,yi) < up(xi+1,yi+1) && middle(xi,yi) < up(xi-1,yi+1) && middle(xi,yi) < down(xi,yi) && middle(xi,yi) < down(xi,yi-1) && middle(xi,yi) < down(xi,yi+1) && middle(xi,yi) < down(xi-1,yi) && middle(xi,yi) < down(xi+1,yi) && middle(xi,yi) < down(xi-1,yi-1) && middle(xi,yi) < down(xi+1,yi-1) && middle(xi,yi) < down(xi+1,yi+1) && middle(xi,yi) < down(xi-1,yi+1) ) { m_extrema[i][j-1](xi,yi) = true; num ++; } // if it is a local extremum, // make sure the extremum is above the contrast threshold if (m_extrema[i][j-1](xi,yi) == true && fabs(middle(xi,yi)) < CONTRAST_THRESHOLD) { m_extrema[i][j-1](xi,yi) = false; num --; } // compute the entries of the Hessian matrix at the extrema location if (m_extrema[i][j-1](xi,yi) == true) { dxx = middle(xi,yi-1) + middle(xi,yi+1) - 2.0 * middle(xi,yi); dyy = middle(xi-1,yi) + middle(xi+1,yi) - 2.0 * middle(xi,yi); dxy = (middle(xi-1,yi-1) + middle(xi+1,yi+1) -
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -