📄 sift.cpp
字号:
// file: sift.cpp// author: Andrea Vedaldi// description: Sift definition// AUTORIGHTS// Copyright (c) 2006 The Regents of the University of California// All Rights Reserved.// // Created by Andrea Vedaldi (UCLA VisionLab)// // Permission to use, copy, modify, and distribute this software and its// documentation for educational, research and non-profit purposes,// without fee, and without a written agreement is hereby granted,// provided that the above copyright notice, this paragraph and the// following three paragraphs appear in all copies.// // This software program and documentation are copyrighted by The Regents// of the University of California. The software program and// documentation are supplied "as is", without any accompanying services// from The Regents. The Regents does not warrant that the operation of// the program will be uninterrupted or error-free. The end-user// understands that the program was developed for research purposes and// is advised not to rely exclusively on the program for any reason.// // This software embodies a method for which the following patent has// been issued: "Method and apparatus for identifying scale invariant// features in an image and use of same for locating an object in an// image," David G. Lowe, US Patent 6,711,293 (March 23,// 2004). Provisional application filed March 8, 1999. Asignee: The// University of British Columbia.// // IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY// FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES,// INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND// ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF CALIFORNIA HAS BEEN// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. THE UNIVERSITY OF// CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR// A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"// BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATIONS TO PROVIDE// MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS./** @mainpage Scale Invariant Feature Transform ** ** The algorithm is implemented by the class VL::Sift. **/#include<sift.hpp>#include<sift-conv.tpp>#include<algorithm>#include<iostream>#include<sstream>using namespace VL ;// on startup, pre compute expn(x) = exp(-x)namespace VL { namespace Detail {int const expnTableSize = 256 ;VL::float_t const expnTableMax = VL::float_t(25.0) ;VL::float_t expnTable [ expnTableSize + 1 ] ;struct buildExpnTable{ buildExpnTable() { for(int k = 0 ; k < expnTableSize + 1 ; ++k) { expnTable[k] = exp( - VL::float_t(k) / expnTableSize * expnTableMax ) ; } }} _buildExpnTable ;} }namespace VL {namespace Detail {/** Comment eater istream manipulator */class _cmnt {} cmnt ;/** @brief Extract a comment from a stream ** ** The function extracts a block of consecutive comments from an ** input stream. A comment is a sequence of whitespaces, followed by ** a `#' character and terminated at the next line ending. A block of ** comments is just a sequence of comments. **/std::istream& operator>>(std::istream& is, _cmnt& manip){ char c ; char b [1024] ; is>>c ; if( c != '#' ) return is.putback(c) ; is.getline(b,1024) ; return is ;}}/** @brief Insert PGM file into stream ** ** The function iserts into the stream @a os the grayscale image @a ** im encoded as a PGM file. The immage is assumed to be normalized ** in the range 0.0 - 1.0. ** ** @param os output stream. ** @param im pointer to image data. ** @param width image width. ** @param height image height. ** @return the stream @a os. **/std::ostream& insertPgm(std::ostream& os, pixel_t const* im, int width, int height){ os<< "P5" << "\n" << width << " " << height << "\n" << "255" << "\n" ; for(int y = 0 ; y < height ; ++y) { for(int x = 0 ; x < width ; ++x) { unsigned char v = (unsigned char) (std::max(std::min(*im++, 1.0f),0.f) * 255.0f) ; os << v ; } } return os ;}/** @brief Extract PGM file from stream. ** ** The function extracts from the stream @a in a grayscale image ** encoded as a PGM file. The function fills the structure @a buffer, ** containing the image dimensions and a pointer to the image data. ** ** The image data is an array of floats and is owned by the caller, ** which should erase it as in ** ** @code ** delete [] buffer.data. ** @endcode ** ** When the function encouters an error it throws a generic instance ** of VL::Exception. ** ** @param in input stream. ** @param buffer buffer descriptor to be filled. ** @return the stream @a in. **/std::istream& extractPgm(std::istream& in, PgmBuffer& buffer){ pixel_t* im_pt ; int width ; int height ; int maxval ; char c ; in>>c ; if( c != 'P') VL_THROW("File is not in PGM format") ; bool is_ascii ; in>>c ; switch( c ) { case '2' : is_ascii = true ; break ; case '5' : is_ascii = false ; break ; default : VL_THROW("File is not in PGM format") ; } in >> Detail::cmnt >> width >> Detail::cmnt >> height >> Detail::cmnt >> maxval ; // after maxval no more comments, just a whitespace or newline {char trash ; in.get(trash) ;} if(maxval > 255) VL_THROW("Only <= 8-bit per channel PGM files are supported") ; if(! in.good()) VL_THROW("PGM header parsing error") ; im_pt = new pixel_t [ width*height ]; try { if( is_ascii ) { pixel_t* start = im_pt ; pixel_t* end = start + width*height ; pixel_t norm = pixel_t( maxval ) ; while( start != end ) { int i ; in >> i ; if( ! in.good() ) VL_THROW ("PGM parsing error file (width="<<width <<" height="<<height <<" maxval="<<maxval <<" at pixel="<<start-im_pt<<")") ; *start++ = pixel_t( i ) / norm ; } } else { std::streampos beg = in.tellg() ; char* buffer = new char [width*height] ; in.read(buffer, width*height) ; if( ! in.good() ) VL_THROW ("PGM parsing error file (width="<<width <<" height="<<height <<" maxval="<<maxval <<" at pixel="<<in.tellg()-beg<<")") ; pixel_t* start = im_pt ; pixel_t* end = start + width*height ; uint8_t* src = reinterpret_cast<uint8_t*>(buffer) ; while( start != end ) *start++ = *src++ / 255.0f ; } } catch(...) { delete [] im_pt ; throw ; } buffer.width = width ; buffer.height = height ; buffer.data = im_pt ; return in ;}// ===================================================================// Low level image ops// -------------------------------------------------------------------namespace Detail {/** @brief Copy an image ** @param dst output imgage buffer. ** @param src input image buffer. ** @param width input image width. ** @param height input image height. **/voidcopy(pixel_t* dst, pixel_t const* src, int width, int height){ memcpy(dst, src, sizeof(pixel_t)*width*height) ;}/** @brief Copy an image upsanmpling two times ** ** The destination buffer must be at least as big as two times ** the input buffer. Bilinear interpolation is used. ** ** @param dst output imgage buffer. ** @param src input image buffer. ** @param width input image width. ** @param height input image height. **/void copyAndUpsampleRows(pixel_t* dst, pixel_t const* src, int width, int height){ for(int y = 0 ; y < height ; ++y) { pixel_t b, a ; b = a = *src++ ; for(int x = 0 ; x < width-1 ; ++x) { b = *src++ ; *dst = a ; dst += height ; *dst = 0.5*(a+b) ; dst += height ; a = b ; } *dst = b ; dst += height ; *dst = b ; dst += height ; dst += 1 - width * 2 * height ; } }/** @brief Copy an image and downsample ** ** The image is downsampled @a d times, i.e. reduced to @c 1/2^d of ** its original size. The parameters @a width and @a height are the ** size of the input image. The destination image is assumed to be @c ** floor(width/2^d) pixels wide and @c floor(height/2^d) pixels high. ** ** @param dst output imgage buffer. ** @param src input image buffer. ** @param width input image width. ** @param height input image height. ** @param d downsampling factor. **/void copyAndDownsample(pixel_t* dst, pixel_t const* src, int width, int height, int d){ for(int y = 0 ; y < height ; y+=d) { pixel_t const * srcrowp = src + y * width ; for(int x = 0 ; x < width - (d-1) ; x+=d) { *dst++ = *srcrowp ; srcrowp += d ; } }}}/** @brief Smooth an image ** ** The function convolves the image @a src by a Gaussian kernel of ** variance @a s and writes the result to @a dst. The function also ** needs a scratch buffer @a dst of the same size of @a src and @a ** dst. ** ** @param dst output image buffer. ** @param temp scratch image buffer. ** @param src input image buffer. ** @param width width of the buffers. ** @param height height of the buffers. ** @param s standard deviation of the Gaussian kernel. **/voidSift::smooth(pixel_t* dst, pixel_t* temp, pixel_t const* src, int width, int height, VL::float_t s){ // make sure a buffer larege enough has been allocated // to hold the filter int W = int( ceil( VL::float_t(4.0) * s ) ) ; if( ! filter ) { filterReserved = 0 ; } if( filterReserved < W ) { filterReserved = W ; if( filter ) delete [] filter ; filter = new pixel_t [ 2* filterReserved + 1 ] ; } // pre-compute filter for(int j = 0 ; j < 2*W+1 ; ++j) filter[j] = VL::pixel_t (std::exp (VL::float_t (-0.5 * (j-W) * (j-W) / (s*s) ))) ; // normalize to one normalize(filter, W) ; // convolve econvolve(temp, src, width, height, filter, W) ; econvolve(dst, temp, height, width, filter, W) ;}// ===================================================================// Sift(), ~Sift()// -------------------------------------------------------------------/** @brief Initialize Gaussian scale space parameters ** ** @param _im_pt Source image data ** @param _width Soruce image width ** @param _height Soruce image height ** @param _sigman Nominal smoothing value of the input image. ** @param _sigma0 Base smoothing level. ** @param _O Number of octaves. ** @param _S Number of levels per octave. ** @param _omin First octave. ** @param _smin First level in each octave. ** @param _smax Last level in each octave. **/Sift::Sift(const pixel_t* _im_pt, int _width, int _height, VL::float_t _sigman, VL::float_t _sigma0, int _O, int _S, int _omin, int _smin, int _smax) : sigman( _sigman ), sigma0( _sigma0 ), O( _O ), S( _S ), omin( _omin ), smin( _smin ), smax( _smax ), temp( NULL ), octaves( NULL ), filter( NULL ) { process(_im_pt, _width, _height) ;}/** @brief Destroy SIFT filter. **/Sift::~Sift(){ freeBuffers() ;}/** Allocate buffers. Buffer sizes depend on the image size and the ** value of omin. **/voidSift::prepareBuffers(){ // compute buffer size int w = (omin >= 0) ? (width >> omin) : (width << -omin) ; int h = (omin >= 0) ? (height >> omin) : (height << -omin) ; int size = w*h* std::max ((smax - smin), 2*((smax+1) - (smin-2) +1)) ; if( temp && tempReserved == size ) return ; freeBuffers() ; // allocate temp = new pixel_t [ size ] ; tempReserved = size ; tempIsGrad = false ; tempOctave = 0 ; octaves = new pixel_t* [ O ] ; for(int o = 0 ; o < O ; ++o) { octaves[o] = new pixel_t [ (smax - smin + 1) * w * h ] ; w >>= 1 ; h >>= 1 ; }} /** @brief Free buffers. ** ** This function releases any buffer allocated by prepareBuffers(). ** ** @sa prepareBuffers(). **/voidSift::freeBuffers(){ if( filter ) { delete [] filter ; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -