⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 programcg.cpp

📁 SiftGPU is an implementation of SIFT [1] for GPU. SiftGPU processes pixels parallely to build Gaussi
💻 CPP
📖 第 1 页 / 共 5 页
字号:
							out<<"else {opos = cpos.yy; index -= sum.y;}}}\n";
			out<<"pos = (pos + pos + opos);\n";*/

#else
			out<<"cc = texRECT(tex"<<i<<", pos);\n";
			out<<"if (index < cc.r) opos = cpos.xx;\n";
			out<<"else if (index < cc.r + cc.g){opos = cpos.yx; index -= cc.r;}\n";
			out<<"else if (index < cc.r + cc.g + cc.b){opos = cpos.xy; index -= (cc.r + cc.g);}\n";
			out<<"else {opos = cpos.yy; index -= (cc.r + cc.g + cc.b);}\n";
			out<<"pos = (pos + pos + opos);\n";
#endif
		}
	}
	out<<"FragColor = float4(pos, index, 1);\n";
	out<<"}\n"<<'\0';
	return new ProgramCG(buffer);
}

void ShaderBagCG::SetGenListInitParam(int w, int h)
{
	float bbox[2] = {w -1.0f, h - 1.0f};
	cgGLSetParameter2fv(_param_genlist_init_bbox, bbox);
}

void ShaderBagCG::SetGenListStartParam(float width, int tex0)
{
	cgGLSetParameter1f(_param_ftex_width, width);

	if(_param_genlist_start_tex0)
	{
		cgGLSetTextureParameter(_param_genlist_start_tex0, tex0);
		cgGLEnableTextureParameter(_param_genlist_start_tex0);
	}
}

void ShaderBagCG::LoadDescriptorShaderF2()
{
	//one shader outpout 128/8 = 16 , each fragout encodes 4
	//const double twopi = 2.0*3.14159265358979323846;
	//const double rpi  = 8.0/twopi;
	char buffer[10240];
	ostrstream out(buffer, 10240);

	out<<setprecision(8);

	out<<"\n"
	"#define M_PI 3.14159265358979323846\n"
	"#define TWO_PI (2.0*M_PI)\n"
	"#define RPI 1.2732395447351626861510701069801\n"
	"#define WF size.z\n"
	"void main(uniform samplerRECT tex,		\n"
	"uniform	samplerRECT gradTex,			\n"
	"uniform float4		dsize,				\n"
	"uniform float3		size,				\n"
	"in		float2	TexCoord0 : TEXCOORD0,	\n"
	"out		float4  FragData0:COLOR0,		\n"
	"out		float4	FragData1:COLOR1)		\n"
	"{\n"
	"	float2 dim	= size.xy;	//image size			\n"
	"	float index = dsize.x * floor(TexCoord0.y * 0.5) + TexCoord0.x;\n"
	"	float idx = 8.0 * frac(index * 0.125) + 8.0 * floor(2.0 * frac(TexCoord0.y * 0.5));		\n"
	"	index = floor(index*0.125) + 0.49;  \n"
	"	float2 coord = floor( float2( fmod(index, dsize.z), index*dsize.w)) + 0.5 ;\n"
	"	float2 pos = texRECT(tex, coord).xy;		\n"
	"	if(any(pos.xy <= 1) || any(pos.xy >=dim-1)) "
	"	//discard;	\n"
	"	{ FragData0 = FragData1 = float4(0.0); return; }\n"
	"	float  anglef = texRECT(tex, coord).z;\n"
	"	if(anglef > M_PI) anglef -= TWO_PI;\n"
	"	float sigma = texRECT(tex, coord).w; \n"
	"	float spt  = abs(sigma * WF);	//default to be 3*sigma	\n";

	//rotation
	out<<
	"	float4 cscs, rots;								\n"
	"	sincos(anglef, cscs.y, cscs.x);					\n"
	"	cscs.zw = - cscs.xy;							\n"
	"	rots = cscs /spt;								\n"
	"	cscs *= spt; \n";

	//here cscs is actually (cos, sin, -cos, -sin) * (factor: 3)*sigma
	//and rots is  (cos, sin, -cos, -sin ) /(factor*sigma)
	//devide the 4x4 sift grid into 16 1x1 block, and each corresponds to a shader thread
	//To use linear interoplation, 1x1 is increased to 2x2, by adding 0.5 to each side
	out<<
	"	float4 temp; float2 pt, offsetpt;				\n"
	"	/*the fraction part of idx is .5*/			\n"
	"	offsetpt.x = 4.0 * frac(idx*0.25) - 2.0;				\n"
	"	offsetpt.y = floor(idx*0.25) - 1.5;			\n"
	"	temp = cscs.xwyx*offsetpt.xyxy;				\n"
	"	pt = pos + temp.xz + temp.yw;				\n";
	
	//get a horizontal bounding box of the rotated rectangle
	out<<
	"	float2 bwin = abs(cscs.xy);					\n"
	"	float bsz = bwin.x + bwin.y;					\n"
	"	float4 sz;	float2 spos;					\n"
	"	sz.xy = max(pt - bsz, float2(1,1));\n"
	"	sz.zw = min(pt + bsz, dim - 2);		\n"
	"	sz = floor(sz)+0.5;"; //move sample point to pixel center

	//get voting for two box
	out<<"\n"
	"	float4 DA, DB;			\n"
	"	DA = DB  = float4(0, 0, 0, 0);		\n"
	"	for(spos.y = sz.y; spos.y <= sz.w;	spos.y+=1.0)				\n"
	"	{																\n"
	"		for(spos.x = sz.x; spos.x <= sz.z;	spos.x+=1.0)			\n"
	"		{															\n"
	"			float2 diff = spos - pt;								\n"
	"			temp = rots.xywx * diff.xyxy;							\n"
	"			float2 nxy = (temp.xz + temp.yw);						\n"
	"			float2 nxyn = abs(nxy);									\n"
	"			if(all(nxyn < float2(1.0)))\n"
	"			{\n"
	"				float4 cc = texRECT(gradTex, spos);						\n"
	"				float mod = cc.b;	float angle = cc.a;					\n"
	"				float theta0 = (anglef - angle)*RPI;				\n"
	"				float theta = theta0 < 0? theta0 + 8.0 : theta0; // fmod(theta0 + 8.0, 8.0); \n"
	"				diff = nxy + offsetpt.xy;								\n"
	"				float ww = exp(-0.125*dot(diff, diff));\n"
	"				float2 weights = 1 - nxyn;\n"
	"				float weight = weights.x * weights.y *mod*ww; \n"
	"				float theta1 = floor(theta); \n"
	"				float weight2 = (theta - theta1) * weight; \n"
	"				float weight1 = weight - weight2;\n"
	"				DA += float4(theta1 == float4(0, 1, 2, 3))*weight1; \n"
	"				DA += float4(theta1 == float4(7, 0, 1, 2))*weight2; \n"
	"				DB += float4(theta1 == float4(4, 5, 6, 7))*weight1;	\n"
	"				DB += float4(theta1 == float4(3, 4, 5, 6))*weight2; \n"
	"			}\n"
	"		}\n"
	"	}\n";

	out<<
	"	FragData0 = DA; FragData1 = DB;\n"
	"}\n"<<'\0';

	ProgramCG * program; 
	s_descriptor_fp = program =  new ProgramCG(buffer);
	_param_descriptor_gtex = cgGetNamedParameter(*program, "gradTex");
	_param_descriptor_size = cgGetNamedParameter(*program, "size");
	_param_descriptor_dsize = cgGetNamedParameter(*program, "dsize");


}

//the shader that computes the descriptors
void ShaderBagCG::LoadDescriptorShader()
{
	GlobalUtil::_DescriptorPPT = 16;
	LoadDescriptorShaderF2();
}

void ShaderBagCG::LoadOrientationShader()
{

	char buffer[10240];
	ostrstream out(buffer,10240);


	out<<"\n"
	"#define GAUSSIAN_WF "<<GlobalUtil::_OrientationGaussianFactor<<" \n"
	"#define SAMPLE_WF ("<<GlobalUtil::_OrientationWindowFactor<< " )\n"
	"#define ORIENTATION_THRESHOLD "<< GlobalUtil::_MulitiOrientationThreshold << "\n"
	"void main(uniform samplerRECT tex,			\n"
	"uniform samplerRECT gradTex,		\n"
	"		uniform float4 size,				\n"
	"		in float2 TexCoord0 : TEXCOORD0,	\n"
	"		out float4 FeatureData : COLOR0	";

	//multi orientation output
	//use one additional texture to store up to four orientations
	//when we use one 32bit float to store two orientations, no extra texture is required

	if(GlobalUtil::_MaxOrientation >1  && GlobalUtil::_OrientationPack2 == 0)
		out<<", out float4 OrientationData : COLOR1";

	if(GlobalUtil::_SubpixelLocalization || GlobalUtil::_KeepExtremumSign)
	{
		//data for sub-pixel localization
		out<<", uniform samplerRECT texS";
	}

	//use 9 float4 to store histogram of 36 directions
	out<<")		\n"
	"{													\n"
	"	float4 bins[10];								\n"
	"	for (int i=0; i<9; i++) bins[i] = float4(0,0,0,0);	\n"
	"	const float4 loc = texRECT(tex, TexCoord0);			\n"
	"	const bool orientation_mode = (size.z != 0);			\n"
	"	float2 pos = loc.xy;							\n"
	"	float sigma = orientation_mode? abs(size.z) : loc.w; \n";
	if(GlobalUtil::_SubpixelLocalization || GlobalUtil::_KeepExtremumSign)
	{
		out<<
	"	if(orientation_mode) {\n"
	"		float4 keyx = texRECT(texS, pos);\n"
	"		sigma = sigma * pow(size.w, keyx.w); \n"
	"		pos.xy = pos.xy + keyx.yz; \n"
	"		#if " << GlobalUtil::_KeepExtremumSign << "\n"
	"			if(keyx.x<0.6) sigma = - sigma;\n"
	"		#endif\n"
	"	}\n";
	}

	out<<
	"	//bool fixed_orientation = (size.z < 0);		\n"
	"	if(size.z < 0) {FeatureData = float4(pos, 0, sigma); return;}"
	"	const float gsigma = sigma * GAUSSIAN_WF;				\n"
	"	const float2 win = abs(sigma.xx) * (SAMPLE_WF * GAUSSIAN_WF);	\n"
	"	const float2 dim = size.xy;							\n"
	"	const float dist_threshold = win.x*win.x+0.5;			\n"
	"	const float factor = -0.5/(gsigma*gsigma);			\n"
	"	float4 sz;	float2 spos;						\n"
	"	//if(any(pos.xy <= 1)) discard;					\n"
	"	sz.xy = max( pos - win, float2(1,1));			\n"
	"	sz.zw = min( pos + win, dim-2);				\n"
	"	sz = floor(sz)+0.5;";
	//loop to get the histogram

	out<<"\n"
	"	for(spos.y = sz.y; spos.y <= sz.w;	spos.y+=1.0)				\n"
	"	{																\n"
	"		for(spos.x = sz.x; spos.x <= sz.z;	spos.x+=1.0)			\n"
	"		{															\n"
	"			const float2 offset = spos - pos;						\n"
	"			const float sq_dist = dot(offset,offset);				\n"
	"			if( sq_dist < dist_threshold){							\n"
	"				const float4 cc = texRECT(gradTex, spos);			\n"
	"				const float grad = cc.b;	float theta = cc.a;		\n"
	"				float idx = floor(degrees(theta)*0.1);		\n"
	"				const float weight = grad*exp(sq_dist * factor);				\n"
	"				if(idx < 0 ) idx += 36;									\n"
	"				const float vidx = 4.0 * fract(idx * 0.25);//fmod(idx, 4);								\n"
	"				const float4 inc = weight*float4(vidx == float4(0,1,2,3));	";

	if(GlobalUtil::_UseDynamicIndexing && strcmp(cgGetProfileString(ProgramCG::_FProfile), "gp4fp")==0)
//	if(ProgramCG::_FProfile == CG_PROFILE_GPU_FP) this enumerant is not defined in cg1.5
	{
		//gp_fp supports dynamic indexing
		out<<"\n"
	"				int iidx = int(floor(idx*0.25));	\n"
	"				bins[iidx]+=inc;					\n"
	"			}										\n"
	"		}											\n"
	"	}";

	}else
	{
		//nvfp40 still does not support dynamic array indexing
		//unrolled binary search...
		out<<"\n"
	"				if(idx < 16)							\n"
	"				{										\n"
	"					if(idx < 8)							\n"
	"					{									\n"
	"						if(idx < 4)	{	bins[0]+=inc;}	\n"
	"						else		{	bins[1]+=inc;}	\n"
	"					}else								\n"
	"					{									\n"
	"						if(idx < 12){	bins[2]+=inc;}	\n"
	"						else		{	bins[3]+=inc;}	\n"
	"					}									\n"
	"				}else if(idx < 32)						\n"
	"				{										\n"
	"					if(idx < 24)						\n"
	"					{									\n"
	"						if(idx <20)	{	bins[4]+=inc;}	\n"
	"						else		{	bins[5]+=inc;}	\n"
	"					}else								\n"
	"					{									\n"
	"						if(idx < 28){	bins[6]+=inc;}	\n"
	"						else		{	bins[7]+=inc;}	\n"
	"					}									\n"
	"				}else 						\n"
	"				{										\n"
	"					bins[8]+=inc;						\n"
	"				}										\n"
	"			}										\n"
	"		}											\n"
	"	}";

	}

	WriteOrientationCodeToStream(out);

	ProgramCG * program;
	s_orientation = program = new ProgramCG(buffer);
	_param_orientation_gtex = cgGetNamedParameter(*program, "gradTex");
	_param_orientation_size = cgGetNamedParameter(*program, "size");
	_param_orientation_stex = cgGetNamedParameter(*program, "texS");
}

void ShaderBagCG::WriteOrientationCodeToStream(std::ostream& out)
{
	//smooth histogram and find the largest
/*
	smoothing kernel:	 (1 3 6 7 6 3 1 )/27
	the same as 3 pass of (1 1 1)/3 averaging
	maybe better to use 4 pass on the vectors...
*/


	//the inner loop on different array numbers is always unrolled in fp40

	//bug fixed here:)
	out<<"\n"
	"	float3x3 mat1 = float3x3(1, 0, 0, 3, 1, 0, 6, 3, 1)/27.0;; //bug fix.. \n"
	"	float4x4 mat2 = float4x4( 7, 6, 3, 1, 6, 7, 6, 3, 3, 6, 7, 6, 1, 3, 6, 7)/27.0;;\n"
	"	for (int j=0; j<2; j++)								\n"
	"	{												\n"
	"		float4 prev  = bins[8];						\n"
	"		bins[9]		 = bins[0];						\n"
	"		for (int i=0; i<9; i++)							\n"
	"		{												\n"
	"			float4 newb	=	mul ( bins[i], mat2);		\n"
	"			newb.xyz	+=	mul ( prev.yzw, mat1);		\n"
	"			prev = bins[i];								\n"
	"			newb.wzy	+=	mul	( bins[i+1].zyx, mat1);	\n"
	"			bins[i] = newb;							\n"
	"		}												\n"
	"	}";


	//find the maximum voting
	out<<"\n"
	"	float4 maxh; float2 maxh2; float4 maxh4 = bins[0];				\n"
	"	for (int i=1; i<9; i++) maxh4 = max(maxh4, bins[i]);				\n"
	"	maxh2 = max(maxh4.xy, maxh4.zw); maxh = float4(max(maxh2.x, maxh2.y));";

	char *testpeak_code;
	char *savepeak_code;



	//save two/three/four orientations with the largest votings?

	//
	if(GlobalUtil::_MaxOrientation>1)
	{
		out<<"\n"
	"	float4 Orientations = float4(0, 0, 0, 0);				\n"
	"	float4 weights = float4(0,0,0,0);		";	
		
		testpeak_code = "\n"
	"		{test = bins[i]>hh;";

		//save the orientations in weight-decreasing order
		if(GlobalUtil::_MaxOrientation ==2)
		{
		savepeak_code = "\n"
	"		if(weight <=weights.g){}\n"
	"		else if(weight >weights.r)\n"
	"		{weights.rg = float2(weight, weights.r); Orientations.rg = float2(th, Orientations.r);}\n"
	"		else {weights.g = weight; Orientations.g = th;}";

		}else if(GlobalUtil::_MaxOrientation ==3)
		{
		savepeak_code = "\n"
	"		if(weight <=weights.b){}\n"
	"		else if(weight >weights.r)\n"
	"		{weights.rgb = float3(weight, weights.rg); Orientations.rgb = float3(th, Orientations.rg);}\n"
	"		else if(weight >weights.g)\n"
	"		{weights.gb = float2(weight, weights.g); Orientations.gb = float2(th, Orientations.g);}\n"
	"		else {weights.b = weight; Orientations.b = th;}";
		}else
		{
		savepeak_code = "\n"
	"		if(weight <=weights.a){}\n"
	"		else if(weight >weights.r)\n"
	"		{weights = float4(weight, weights.rgb); Orientations = float4(th, Orientations.rgb);}\n"
	"		else if(weight >weights.g)\n"
	"		{weights.gba = float3(weight, weights.gb); Orientations.gba = float3(th, Orientations.gb);}\n"
	"		else if(weight >weights.b)\n"
	"		{weights.ba = float2(weight, weights.b); Orientations.ba = float2(th, Orientations.b);}\n"
	"		else {weights.a = weight; Orientations.a = th;}";
		}

	}else

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -