⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 pyramidgl.cpp

📁 SiftGPU is an implementation of SIFT [1] for GPU. SiftGPU processes pixels parallely to build Gaussi
💻 CPP
📖 第 1 页 / 共 5 页
字号:
			__m128 ps = _mm_loadu_ps(p);
			_mm_storeu_ps(p, _mm_mul_ps(ps, r));
		}
	}
#endif


inline void PyramidGL::NormalizeDescriptor(int num, float*pd)
{

#ifdef USE_SSE_FOR_SIFTGPU
	for(int k = 0; k < num; k++, pd +=128)
	{
		float sq;
		//normalize and truncate to .2
		sq = dotproduct_128d(pd);		sq = 1.0f / sqrtf(sq);
		multiply_and_truncate_128d(pd, sq);

		//renormalize
		sq = dotproduct_128d(pd);		sq = 1.0f / sqrtf(sq);
		multiply_128d(pd, sq);
	}
#else
	//descriptor normalization runs on cpu for OpenGL implemenations
	for(int k = 0; k < num; k++, pd +=128)
	{
		int v;
		float* ppd, sq = 0;
		//int v;
		//normalize
		ppd = pd;
		for(v = 0 ; v < 128; v++, ppd++)	sq += (*ppd)*(*ppd);
		sq = 1.0f / sqrtf(sq);
		//truncate to .2
		ppd = pd;
		for(v = 0; v < 128; v ++, ppd++)	*ppd = min(*ppd*sq, 0.2f);

		//renormalize
		ppd = pd; sq = 0;
		for(v = 0; v < 128; v++, ppd++)	sq += (*ppd)*(*ppd);
		sq = 1.0f / sqrtf(sq);

		ppd = pd;
		for(v = 0; v < 128; v ++, ppd++)	*ppd = *ppd*sq;
	}

#endif
}

inline void PyramidGL::InterlaceDescriptorF2(int w, int h, float* buf, float* pd, int step)
{
	/*
	if(GlobalUtil::_DescriptorPPR == 8)
	{
		const int dstep = w * 128;
		float* pp1 = buf;
		float* pp2 = buf + step;

		for(int u = 0; u < h ; u++, pd+=dstep)
		{
			int v; 
			float* ppd = pd;
			for(v= 0; v < w; v++)
			{
				for(int t = 0; t < 8; t++)
				{
					*ppd++ = *pp1++;*ppd++ = *pp1++;*ppd++ = *pp1++;*ppd++ = *pp1++;
					*ppd++ = *pp2++;*ppd++ = *pp2++;*ppd++ = *pp2++;*ppd++ = *pp2++;
				}
				ppd += 64;
			}
			ppd = pd + 64;
			for(v= 0; v < w; v++)
			{
				for(int t = 0; t < 8; t++)
				{
					*ppd++ = *pp1++;*ppd++ = *pp1++;*ppd++ = *pp1++;*ppd++ = *pp1++;
					*ppd++ = *pp2++;*ppd++ = *pp2++;*ppd++ = *pp2++;*ppd++ = *pp2++;
				}
				ppd += 64;
			}
		}

	}else */
	if(GlobalUtil::_DescriptorPPR == 8)
	{
		//interlace
		for(int k = 0; k < 2; k++)
		{
			float* pp = buf + k * step;
			float* ppd = pd + k * 4;
			for(int u = 0; u < h ; u++)
			{
				int v; 
				for(v= 0; v < w; v++)
				{
					for(int t = 0; t < 8; t++)
					{
						ppd[0] = pp[0];
						ppd[1] = pp[1];
						ppd[2] = pp[2];
						ppd[3] = pp[3];
						ppd += 8;
						pp+= 4;
					}
					ppd += 64;
				}
				ppd += ( 64 - 128 * w );
				for(v= 0; v < w; v++)
				{
					for(int t = 0; t < 8; t++)
					{
						ppd[0] = pp[0];
						ppd[1] = pp[1];
						ppd[2] = pp[2];
						ppd[3] = pp[3];

						ppd += 8;
						pp+= 4;
					}
					ppd += 64;
				}
				ppd -=64;
			}
		}
	}else if(GlobalUtil::_DescriptorPPR == 4)
	{

	}



}
void PyramidGL::GetFeatureDescriptors()
{
	//descriptors...
	float sigma;
	int idx, i, j, k, w, h;
	int ndf = 32 / GlobalUtil::_DescriptorPPT; //number of textures
	int block_width = GlobalUtil::_DescriptorPPR;
	int block_height = GlobalUtil::_DescriptorPPT/GlobalUtil::_DescriptorPPR;
	float* pd =  &_descriptor_buffer[0], * pbuf  = NULL;
	vector<float>read_buffer, descriptor_buffer2;

	//use another buffer, if we need to re-order the descriptors
	if(_keypoint_index.size() > 0)
	{
		descriptor_buffer2.resize(_descriptor_buffer.size());
		pd = &descriptor_buffer2[0];
	}
	FrameBufferObject fbo;

	GLTexImage * gtex, *otex, * ftex;
	GLenum buffers[8] = { 
		GL_COLOR_ATTACHMENT0_EXT,		GL_COLOR_ATTACHMENT1_EXT ,
		GL_COLOR_ATTACHMENT2_EXT,		GL_COLOR_ATTACHMENT3_EXT ,
		GL_COLOR_ATTACHMENT4_EXT,		GL_COLOR_ATTACHMENT5_EXT ,
		GL_COLOR_ATTACHMENT6_EXT,		GL_COLOR_ATTACHMENT7_EXT ,
	};

	glDrawBuffers(ndf, buffers);
	glReadBuffer(GL_COLOR_ATTACHMENT0_EXT);


	for( i = 0, idx = 0, ftex = _featureTex; i < _octave_num; i++)
	{
		gtex = GetBaseLevel(i + _octave_min, DATA_GRAD) + GlobalUtil::_GradientLevelOffset;
		otex = GetBaseLevel(i + _octave_min, DATA_ROT)  + GlobalUtil::_GradientLevelOffset;
		for( j = 0; j < param._dog_level_num; j++, ftex++, idx++, gtex++, otex++)
		{
			if(_levelFeatureNum[idx]==0)continue;

			sigma = param.GetLevelSigma(j+param._level_min+1);
			int count = _levelFeatureNum[idx] * block_width;
			GetAlignedStorageSize(count, block_width, w, h);
			h = ((int)ceil(double(count) / w)) * block_height;

			//not enought space for holding the descriptor data
			if(w > _descriptorTex[0].GetTexWidth() || h > _descriptorTex[0].GetTexHeight())
			{
				for(k = 0; k < ndf; k++)_descriptorTex[k].InitTexture(w, h);
			}
			for(k = 0; k < ndf; k++)	_descriptorTex[k].AttachToFBO(k);
			GlobalUtil::FitViewPort(w, h);
			glActiveTexture(GL_TEXTURE0);
			ftex->BindTex();
			glActiveTexture(GL_TEXTURE1);
			gtex->BindTex();
			if(otex!=gtex)
			{
				glActiveTexture(GL_TEXTURE2);
				otex->BindTex();
			}

			ShaderMan::UseShaderDescriptor(gtex->GetTexID(), otex->GetTexID(), 
				w, ftex->GetImgWidth(), gtex->GetImgWidth(), gtex->GetImgHeight(), sigma);
			GLTexImage::DrawQuad(0, (float)w, 0, (float)h);

			 //read back float format descriptors and do normalization on CPU
			int step = w*h*4;
			if((unsigned int)step*ndf > read_buffer.size())
			{
				read_buffer.resize(ndf*step);
			}
			pbuf = &read_buffer[0];
			
			//read back
			for(k = 0; k < ndf; k++, pbuf+=step)
			{
				glReadBuffer(GL_COLOR_ATTACHMENT0_EXT + k);
				glReadPixels(0, 0, w, h, GL_RGBA, GL_FLOAT, pbuf);
			}
	
			//the following two steps run on cpu, so better cpu better speed
			//and release version can be a lot faster than debug version
			//interlace data on the two texture to get the descriptor
			InterlaceDescriptorF2(w / block_width, h / block_height, &read_buffer[0], pd, step);
			
			//need to do normalization
			//the new version uses SSE to speed up this part
			if(GlobalUtil::_NormalizedSIFT) NormalizeDescriptor(_levelFeatureNum[idx], pd);

			pd += 128*_levelFeatureNum[idx];
			glReadBuffer(GL_NONE);
		}
	}


	//finally, put the descriptor back to their original order for existing keypoint list.
	if(_keypoint_index.size() > 0)
	{
		for(i = 0; i < _featureNum; ++i)
		{
			int index = _keypoint_index[i];
			memcpy(&_descriptor_buffer[index*128], &descriptor_buffer2[i*128], 128 * sizeof(float));
		}
	}

	////////////////////////
	GLTexImage::UnbindMultiTex(3); 
	glDrawBuffer(GL_NONE);
	ShaderMan::UnloadProgram();
	if(GlobalUtil::_timingS)glFinish();
	for(i = 0; i < ndf; i++) fbo.UnattachTex(GL_COLOR_ATTACHMENT0_EXT +i);

}


void PyramidGL::DownloadKeypoints()
{
	const double twopi = 2.0*3.14159265358979323846;
	int idx = 0;
	float * buffer = &_keypoint_buffer[0];
	vector<float> keypoint_buffer2;
	//use a different keypoint buffer when processing with an exisint features list
	//without orientation information. 
	if(_keypoint_index.size() > 0)
	{
		keypoint_buffer2.resize(_keypoint_buffer.size());
		buffer = &keypoint_buffer2[0];
	}
	float * p = buffer, *ps, sigma;
	GLTexImage * ftex = _featureTex;
	FrameBufferObject fbo;
	ftex->FitRealTexViewPort();
	/////////////////////
	float os = _octave_min>=0? float(1<<_octave_min): 1.0f/(1<<(-_octave_min));
	if(_down_sample_factor>0) os *= float(1<<_down_sample_factor); 
	float offset = GlobalUtil::_LoweOrigin? 0 : 0.5f;
	/////////////////////
	for(int i = 0; i < _octave_num; i++, os *= 2.0f)
	{
		
		for(int j = 0; j  < param._dog_level_num; j++, idx++, ftex++)
		{

			if(_levelFeatureNum[idx]>0)
			{	
				ftex->AttachToFBO(0);
				glReadPixels(0, 0, ftex->GetImgWidth(), ftex->GetImgHeight(),GL_RGBA, GL_FLOAT, p);
				ps = p;
				for(int k = 0;  k < _levelFeatureNum[idx]; k++, ps+=4)
				{
					ps[0] = os*(ps[0]-0.5f) + offset;	//x
					ps[1] = os*(ps[1]-0.5f) + offset;	//y
					sigma = os*ps[3]; 
					ps[3] = (float)fmod(twopi-ps[2], twopi);	//orientation, mirrored
					ps[2] = sigma;  //scale
				}
				p+= 4* _levelFeatureNum[idx];
			}
		}
	}

	//put the feature into their original order

	if(_keypoint_index.size() > 0)
	{
		for(int i = 0; i < _featureNum; ++i)
		{
			int index = _keypoint_index[i];
			memcpy(&_keypoint_buffer[index*4], &keypoint_buffer2[i*4], 4 * sizeof(float));
		}
	}
}


void PyramidGL::GenerateFeatureListTex()
{
	//generate feature list texture from existing keypoints
	//do feature sorting in the same time?

	FrameBufferObject fbo;
	vector<float> list;
	int idx = 0;
	const double twopi = 2.0*3.14159265358979323846;
	float sigma_half_step = powf(2.0f, 0.5f / param._dog_level_num);
	float octave_sigma = _octave_min>=0? float(1<<_octave_min): 1.0f/(1<<(-_octave_min));
	float offset = GlobalUtil::_LoweOrigin? 0 : 0.5f; 
	if(_down_sample_factor>0) octave_sigma *= float(1<<_down_sample_factor); 

	_keypoint_index.resize(0); // should already be 0
	for(int i = 0; i < _octave_num; i++, octave_sigma*= 2.0f)
	{
		for(int j = 0; j < param._dog_level_num; j++, idx++)
		{
			list.resize(0);
			float level_sigma = param.GetLevelSigma(j + param._level_min + 1) * octave_sigma;
			float sigma_min = level_sigma / sigma_half_step;
			float sigma_max = level_sigma * sigma_half_step;
			int fcount = 0 ;
			for(int k = 0; k < _featureNum; k++)
			{
				float * key = &_keypoint_buffer[k*4];
				if(   (key[2] >= sigma_min && key[2] < sigma_max)
					||(key[2] < sigma_min && i ==0 && j == 0)
					||(key[2] > sigma_max && i == _octave_num -1 && j == param._dog_level_num - 1))
				{
					//add this keypoint to the list
					list.push_back((key[0] - offset) / octave_sigma + 0.5f);
					list.push_back((key[1] - offset) / octave_sigma + 0.5f);
					list.push_back((float)fmod(twopi-key[3], twopi));
					list.push_back(key[2] / octave_sigma);
					fcount ++;
					//save the index of keypoints
					_keypoint_index.push_back(k);
				}

			}

			_levelFeatureNum[idx] = fcount;
			if(fcount==0)continue;
			GLTexImage * ftex = _featureTex+idx;

			SetLevelFeatureNum(idx, fcount);

			int fw = ftex->GetImgWidth();
			int fh = ftex->GetImgHeight();

			list.resize(4*fh*fw);

			ftex->BindTex();
			ftex->AttachToFBO(0);
			glTexSubImage2D(GlobalUtil::_texTarget, 0, 0, 0, fw, fh, GL_RGBA, GL_FLOAT, &list[0]);
		}
	}
	GLTexImage::UnbindTex();
	if(GlobalUtil::_verbose)
	{
		std::cout<<"#Features:\t"<<_featureNum<<"\n";
	}
}



PyramidPacked::PyramidPacked(SiftParam& sp): PyramidGL(sp)
{
	_allPyramid = NULL;
}

PyramidPacked::~PyramidPacked()
{
	DestroyPyramidData();
}


//build the gaussian pyrmaid

void PyramidPacked::BuildPyramid(GLTexInput * input)
{
	//
	USE_TIMING();
	int i, j;
	GLTexImage * tex, *tmp;
	FilterProgram ** filter;
	FrameBufferObject fbo;

	glDrawBuffer(GL_COLOR_ATTACHMENT0_EXT);
	input->FitTexViewPort();

	for ( i = _octave_min; i < _octave_min + _octave_num; i++)
	{

		tex = GetBaseLevel(i);
		tmp = GetBaseLevel(i, DATA_DOG) + 2; //use this as a temperory texture

		j = param._level_min + 1;
		filter = ShaderMan::f_gaussian_step;

		OCTAVE_START();

		if( i == _octave_min )
		{
			if(i < 0)
			{
				TextureUpSample(tex, input, 1<<(-i-1));			
			}else
			{
				//image might have been already down-sampled by cpu code
				TextureDownSample(tex, input, 1<<(i+1));
			}
			//
			if(ShaderMan::f_gaussian_skip0)
			{
				ShaderMan::f_gaussian_skip0->RunFilter(tex, tex, tmp);
			}
			//tex->FillMargin(0, 1);	
			LEVEL_FINISH();
		}else
		{
			TextureDownSample(tex, GetLevelTexture(i-1, param._level_ds)); 
	
			LEVEL_FINISH();

			if(ShaderMan::f_gaussian_skip1)
			{
				ShaderMan::f_gaussian_skip1->RunFilter(tex, tex, tmp);
				//tex->FillMargin(0, 1);
				LEVEL_FINISH();
			}
		}
		

		for( ; j <=  param._level_max ; j++, tex++, filter++)
		{
			// filtering
			(*filter)->RunFilter(tex+1, tex, tmp);
			//(tex+1)->FillMargin(0, 1);
			LEVEL_FINISH();


		}
		//tex->FillMargin(1, 0);
		OCTAVE_FINISH();

	}
	if(GlobalUtil::_timingS)	glFinish();
	UnloadProgram();	
}

void PyramidPacked::ComputeGradient()
{
	
	//first pass, compute dog, gradient, orientation
	GLenum buffers[4] = { 
		GL_COLOR_ATTACHMENT0_EXT,		GL_COLOR_ATTACHMENT1_EXT ,
		GL_COLOR_ATTACHMENT2_EXT,		GL_COLOR_ATTACHMENT3_EXT
	};

	int i, j;
	double ts, t1;
	FrameBufferObject fbo;

	if(GlobalUtil::_timingS && GlobalUtil::_verbose)ts = CLOCK();

	for(i = _octave_min; i < _octave_min + _octave_num; i++)
	{
		GLTexImage * gus = GetBaseLevel(i) +  GlobalUtil::_GradientLevelOffset;
		GLTexImage * dog = GetBaseLevel(i, DATA_DOG) +  GlobalUtil::_GradientLevelOffset;
		GLTexImage * grd = GetBaseLevel(i, DATA_GRAD) +  GlobalUtil::_GradientLevelOffset;
		GLTexImage * rot = GetBaseLevel(i, DATA_ROT) +  GlobalUtil::_GradientLevelOffset;
		glDrawBuffers(3, buffers);
		gus->FitTexViewPort();
		//compute the gradient
		for(j = 0; j <  param._dog_level_num ; j++, gus++, dog++, grd++, rot++)
		{
			//gradient, dog, orientation
			glActiveTexture(GL_TEXTURE0);
			gus->BindTex();
			glActiveTexture(GL_TEXTURE1);
			(gus-1)->BindTex();
			//output
			dog->AttachToFBO(0);
			grd->AttachToFBO(1);
			rot->AttachToFBO(2);
			ShaderMan::UseShaderGradientPass((gus-1)->GetTexID());
			//compuate
			dog->DrawQuadMT4();
		}
	}
	if(GlobalUtil::_timingS)
	{
		glFinish();
		if(GlobalUtil::_verbose)
		{
			t1 = CLOCK();
			std::cout	<<"<Gradient, DOG  >\t"<<(t1-ts)<<"\n";
		}
	}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -