📄 pyramidgl.cpp
字号:
__m128 ps = _mm_loadu_ps(p);
_mm_storeu_ps(p, _mm_mul_ps(ps, r));
}
}
#endif
inline void PyramidGL::NormalizeDescriptor(int num, float*pd)
{
#ifdef USE_SSE_FOR_SIFTGPU
for(int k = 0; k < num; k++, pd +=128)
{
float sq;
//normalize and truncate to .2
sq = dotproduct_128d(pd); sq = 1.0f / sqrtf(sq);
multiply_and_truncate_128d(pd, sq);
//renormalize
sq = dotproduct_128d(pd); sq = 1.0f / sqrtf(sq);
multiply_128d(pd, sq);
}
#else
//descriptor normalization runs on cpu for OpenGL implemenations
for(int k = 0; k < num; k++, pd +=128)
{
int v;
float* ppd, sq = 0;
//int v;
//normalize
ppd = pd;
for(v = 0 ; v < 128; v++, ppd++) sq += (*ppd)*(*ppd);
sq = 1.0f / sqrtf(sq);
//truncate to .2
ppd = pd;
for(v = 0; v < 128; v ++, ppd++) *ppd = min(*ppd*sq, 0.2f);
//renormalize
ppd = pd; sq = 0;
for(v = 0; v < 128; v++, ppd++) sq += (*ppd)*(*ppd);
sq = 1.0f / sqrtf(sq);
ppd = pd;
for(v = 0; v < 128; v ++, ppd++) *ppd = *ppd*sq;
}
#endif
}
inline void PyramidGL::InterlaceDescriptorF2(int w, int h, float* buf, float* pd, int step)
{
/*
if(GlobalUtil::_DescriptorPPR == 8)
{
const int dstep = w * 128;
float* pp1 = buf;
float* pp2 = buf + step;
for(int u = 0; u < h ; u++, pd+=dstep)
{
int v;
float* ppd = pd;
for(v= 0; v < w; v++)
{
for(int t = 0; t < 8; t++)
{
*ppd++ = *pp1++;*ppd++ = *pp1++;*ppd++ = *pp1++;*ppd++ = *pp1++;
*ppd++ = *pp2++;*ppd++ = *pp2++;*ppd++ = *pp2++;*ppd++ = *pp2++;
}
ppd += 64;
}
ppd = pd + 64;
for(v= 0; v < w; v++)
{
for(int t = 0; t < 8; t++)
{
*ppd++ = *pp1++;*ppd++ = *pp1++;*ppd++ = *pp1++;*ppd++ = *pp1++;
*ppd++ = *pp2++;*ppd++ = *pp2++;*ppd++ = *pp2++;*ppd++ = *pp2++;
}
ppd += 64;
}
}
}else */
if(GlobalUtil::_DescriptorPPR == 8)
{
//interlace
for(int k = 0; k < 2; k++)
{
float* pp = buf + k * step;
float* ppd = pd + k * 4;
for(int u = 0; u < h ; u++)
{
int v;
for(v= 0; v < w; v++)
{
for(int t = 0; t < 8; t++)
{
ppd[0] = pp[0];
ppd[1] = pp[1];
ppd[2] = pp[2];
ppd[3] = pp[3];
ppd += 8;
pp+= 4;
}
ppd += 64;
}
ppd += ( 64 - 128 * w );
for(v= 0; v < w; v++)
{
for(int t = 0; t < 8; t++)
{
ppd[0] = pp[0];
ppd[1] = pp[1];
ppd[2] = pp[2];
ppd[3] = pp[3];
ppd += 8;
pp+= 4;
}
ppd += 64;
}
ppd -=64;
}
}
}else if(GlobalUtil::_DescriptorPPR == 4)
{
}
}
void PyramidGL::GetFeatureDescriptors()
{
//descriptors...
float sigma;
int idx, i, j, k, w, h;
int ndf = 32 / GlobalUtil::_DescriptorPPT; //number of textures
int block_width = GlobalUtil::_DescriptorPPR;
int block_height = GlobalUtil::_DescriptorPPT/GlobalUtil::_DescriptorPPR;
float* pd = &_descriptor_buffer[0], * pbuf = NULL;
vector<float>read_buffer, descriptor_buffer2;
//use another buffer, if we need to re-order the descriptors
if(_keypoint_index.size() > 0)
{
descriptor_buffer2.resize(_descriptor_buffer.size());
pd = &descriptor_buffer2[0];
}
FrameBufferObject fbo;
GLTexImage * gtex, *otex, * ftex;
GLenum buffers[8] = {
GL_COLOR_ATTACHMENT0_EXT, GL_COLOR_ATTACHMENT1_EXT ,
GL_COLOR_ATTACHMENT2_EXT, GL_COLOR_ATTACHMENT3_EXT ,
GL_COLOR_ATTACHMENT4_EXT, GL_COLOR_ATTACHMENT5_EXT ,
GL_COLOR_ATTACHMENT6_EXT, GL_COLOR_ATTACHMENT7_EXT ,
};
glDrawBuffers(ndf, buffers);
glReadBuffer(GL_COLOR_ATTACHMENT0_EXT);
for( i = 0, idx = 0, ftex = _featureTex; i < _octave_num; i++)
{
gtex = GetBaseLevel(i + _octave_min, DATA_GRAD) + GlobalUtil::_GradientLevelOffset;
otex = GetBaseLevel(i + _octave_min, DATA_ROT) + GlobalUtil::_GradientLevelOffset;
for( j = 0; j < param._dog_level_num; j++, ftex++, idx++, gtex++, otex++)
{
if(_levelFeatureNum[idx]==0)continue;
sigma = param.GetLevelSigma(j+param._level_min+1);
int count = _levelFeatureNum[idx] * block_width;
GetAlignedStorageSize(count, block_width, w, h);
h = ((int)ceil(double(count) / w)) * block_height;
//not enought space for holding the descriptor data
if(w > _descriptorTex[0].GetTexWidth() || h > _descriptorTex[0].GetTexHeight())
{
for(k = 0; k < ndf; k++)_descriptorTex[k].InitTexture(w, h);
}
for(k = 0; k < ndf; k++) _descriptorTex[k].AttachToFBO(k);
GlobalUtil::FitViewPort(w, h);
glActiveTexture(GL_TEXTURE0);
ftex->BindTex();
glActiveTexture(GL_TEXTURE1);
gtex->BindTex();
if(otex!=gtex)
{
glActiveTexture(GL_TEXTURE2);
otex->BindTex();
}
ShaderMan::UseShaderDescriptor(gtex->GetTexID(), otex->GetTexID(),
w, ftex->GetImgWidth(), gtex->GetImgWidth(), gtex->GetImgHeight(), sigma);
GLTexImage::DrawQuad(0, (float)w, 0, (float)h);
//read back float format descriptors and do normalization on CPU
int step = w*h*4;
if((unsigned int)step*ndf > read_buffer.size())
{
read_buffer.resize(ndf*step);
}
pbuf = &read_buffer[0];
//read back
for(k = 0; k < ndf; k++, pbuf+=step)
{
glReadBuffer(GL_COLOR_ATTACHMENT0_EXT + k);
glReadPixels(0, 0, w, h, GL_RGBA, GL_FLOAT, pbuf);
}
//the following two steps run on cpu, so better cpu better speed
//and release version can be a lot faster than debug version
//interlace data on the two texture to get the descriptor
InterlaceDescriptorF2(w / block_width, h / block_height, &read_buffer[0], pd, step);
//need to do normalization
//the new version uses SSE to speed up this part
if(GlobalUtil::_NormalizedSIFT) NormalizeDescriptor(_levelFeatureNum[idx], pd);
pd += 128*_levelFeatureNum[idx];
glReadBuffer(GL_NONE);
}
}
//finally, put the descriptor back to their original order for existing keypoint list.
if(_keypoint_index.size() > 0)
{
for(i = 0; i < _featureNum; ++i)
{
int index = _keypoint_index[i];
memcpy(&_descriptor_buffer[index*128], &descriptor_buffer2[i*128], 128 * sizeof(float));
}
}
////////////////////////
GLTexImage::UnbindMultiTex(3);
glDrawBuffer(GL_NONE);
ShaderMan::UnloadProgram();
if(GlobalUtil::_timingS)glFinish();
for(i = 0; i < ndf; i++) fbo.UnattachTex(GL_COLOR_ATTACHMENT0_EXT +i);
}
void PyramidGL::DownloadKeypoints()
{
const double twopi = 2.0*3.14159265358979323846;
int idx = 0;
float * buffer = &_keypoint_buffer[0];
vector<float> keypoint_buffer2;
//use a different keypoint buffer when processing with an exisint features list
//without orientation information.
if(_keypoint_index.size() > 0)
{
keypoint_buffer2.resize(_keypoint_buffer.size());
buffer = &keypoint_buffer2[0];
}
float * p = buffer, *ps, sigma;
GLTexImage * ftex = _featureTex;
FrameBufferObject fbo;
ftex->FitRealTexViewPort();
/////////////////////
float os = _octave_min>=0? float(1<<_octave_min): 1.0f/(1<<(-_octave_min));
if(_down_sample_factor>0) os *= float(1<<_down_sample_factor);
float offset = GlobalUtil::_LoweOrigin? 0 : 0.5f;
/////////////////////
for(int i = 0; i < _octave_num; i++, os *= 2.0f)
{
for(int j = 0; j < param._dog_level_num; j++, idx++, ftex++)
{
if(_levelFeatureNum[idx]>0)
{
ftex->AttachToFBO(0);
glReadPixels(0, 0, ftex->GetImgWidth(), ftex->GetImgHeight(),GL_RGBA, GL_FLOAT, p);
ps = p;
for(int k = 0; k < _levelFeatureNum[idx]; k++, ps+=4)
{
ps[0] = os*(ps[0]-0.5f) + offset; //x
ps[1] = os*(ps[1]-0.5f) + offset; //y
sigma = os*ps[3];
ps[3] = (float)fmod(twopi-ps[2], twopi); //orientation, mirrored
ps[2] = sigma; //scale
}
p+= 4* _levelFeatureNum[idx];
}
}
}
//put the feature into their original order
if(_keypoint_index.size() > 0)
{
for(int i = 0; i < _featureNum; ++i)
{
int index = _keypoint_index[i];
memcpy(&_keypoint_buffer[index*4], &keypoint_buffer2[i*4], 4 * sizeof(float));
}
}
}
void PyramidGL::GenerateFeatureListTex()
{
//generate feature list texture from existing keypoints
//do feature sorting in the same time?
FrameBufferObject fbo;
vector<float> list;
int idx = 0;
const double twopi = 2.0*3.14159265358979323846;
float sigma_half_step = powf(2.0f, 0.5f / param._dog_level_num);
float octave_sigma = _octave_min>=0? float(1<<_octave_min): 1.0f/(1<<(-_octave_min));
float offset = GlobalUtil::_LoweOrigin? 0 : 0.5f;
if(_down_sample_factor>0) octave_sigma *= float(1<<_down_sample_factor);
_keypoint_index.resize(0); // should already be 0
for(int i = 0; i < _octave_num; i++, octave_sigma*= 2.0f)
{
for(int j = 0; j < param._dog_level_num; j++, idx++)
{
list.resize(0);
float level_sigma = param.GetLevelSigma(j + param._level_min + 1) * octave_sigma;
float sigma_min = level_sigma / sigma_half_step;
float sigma_max = level_sigma * sigma_half_step;
int fcount = 0 ;
for(int k = 0; k < _featureNum; k++)
{
float * key = &_keypoint_buffer[k*4];
if( (key[2] >= sigma_min && key[2] < sigma_max)
||(key[2] < sigma_min && i ==0 && j == 0)
||(key[2] > sigma_max && i == _octave_num -1 && j == param._dog_level_num - 1))
{
//add this keypoint to the list
list.push_back((key[0] - offset) / octave_sigma + 0.5f);
list.push_back((key[1] - offset) / octave_sigma + 0.5f);
list.push_back((float)fmod(twopi-key[3], twopi));
list.push_back(key[2] / octave_sigma);
fcount ++;
//save the index of keypoints
_keypoint_index.push_back(k);
}
}
_levelFeatureNum[idx] = fcount;
if(fcount==0)continue;
GLTexImage * ftex = _featureTex+idx;
SetLevelFeatureNum(idx, fcount);
int fw = ftex->GetImgWidth();
int fh = ftex->GetImgHeight();
list.resize(4*fh*fw);
ftex->BindTex();
ftex->AttachToFBO(0);
glTexSubImage2D(GlobalUtil::_texTarget, 0, 0, 0, fw, fh, GL_RGBA, GL_FLOAT, &list[0]);
}
}
GLTexImage::UnbindTex();
if(GlobalUtil::_verbose)
{
std::cout<<"#Features:\t"<<_featureNum<<"\n";
}
}
PyramidPacked::PyramidPacked(SiftParam& sp): PyramidGL(sp)
{
_allPyramid = NULL;
}
PyramidPacked::~PyramidPacked()
{
DestroyPyramidData();
}
//build the gaussian pyrmaid
void PyramidPacked::BuildPyramid(GLTexInput * input)
{
//
USE_TIMING();
int i, j;
GLTexImage * tex, *tmp;
FilterProgram ** filter;
FrameBufferObject fbo;
glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
input->FitTexViewPort();
for ( i = _octave_min; i < _octave_min + _octave_num; i++)
{
tex = GetBaseLevel(i);
tmp = GetBaseLevel(i, DATA_DOG) + 2; //use this as a temperory texture
j = param._level_min + 1;
filter = ShaderMan::f_gaussian_step;
OCTAVE_START();
if( i == _octave_min )
{
if(i < 0)
{
TextureUpSample(tex, input, 1<<(-i-1));
}else
{
//image might have been already down-sampled by cpu code
TextureDownSample(tex, input, 1<<(i+1));
}
//
if(ShaderMan::f_gaussian_skip0)
{
ShaderMan::f_gaussian_skip0->RunFilter(tex, tex, tmp);
}
//tex->FillMargin(0, 1);
LEVEL_FINISH();
}else
{
TextureDownSample(tex, GetLevelTexture(i-1, param._level_ds));
LEVEL_FINISH();
if(ShaderMan::f_gaussian_skip1)
{
ShaderMan::f_gaussian_skip1->RunFilter(tex, tex, tmp);
//tex->FillMargin(0, 1);
LEVEL_FINISH();
}
}
for( ; j <= param._level_max ; j++, tex++, filter++)
{
// filtering
(*filter)->RunFilter(tex+1, tex, tmp);
//(tex+1)->FillMargin(0, 1);
LEVEL_FINISH();
}
//tex->FillMargin(1, 0);
OCTAVE_FINISH();
}
if(GlobalUtil::_timingS) glFinish();
UnloadProgram();
}
void PyramidPacked::ComputeGradient()
{
//first pass, compute dog, gradient, orientation
GLenum buffers[4] = {
GL_COLOR_ATTACHMENT0_EXT, GL_COLOR_ATTACHMENT1_EXT ,
GL_COLOR_ATTACHMENT2_EXT, GL_COLOR_ATTACHMENT3_EXT
};
int i, j;
double ts, t1;
FrameBufferObject fbo;
if(GlobalUtil::_timingS && GlobalUtil::_verbose)ts = CLOCK();
for(i = _octave_min; i < _octave_min + _octave_num; i++)
{
GLTexImage * gus = GetBaseLevel(i) + GlobalUtil::_GradientLevelOffset;
GLTexImage * dog = GetBaseLevel(i, DATA_DOG) + GlobalUtil::_GradientLevelOffset;
GLTexImage * grd = GetBaseLevel(i, DATA_GRAD) + GlobalUtil::_GradientLevelOffset;
GLTexImage * rot = GetBaseLevel(i, DATA_ROT) + GlobalUtil::_GradientLevelOffset;
glDrawBuffers(3, buffers);
gus->FitTexViewPort();
//compute the gradient
for(j = 0; j < param._dog_level_num ; j++, gus++, dog++, grd++, rot++)
{
//gradient, dog, orientation
glActiveTexture(GL_TEXTURE0);
gus->BindTex();
glActiveTexture(GL_TEXTURE1);
(gus-1)->BindTex();
//output
dog->AttachToFBO(0);
grd->AttachToFBO(1);
rot->AttachToFBO(2);
ShaderMan::UseShaderGradientPass((gus-1)->GetTexID());
//compuate
dog->DrawQuadMT4();
}
}
if(GlobalUtil::_timingS)
{
glFinish();
if(GlobalUtil::_verbose)
{
t1 = CLOCK();
std::cout <<"<Gradient, DOG >\t"<<(t1-ts)<<"\n";
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -