⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 programglsl.cpp

📁 SiftGPU is an implementation of SIFT [1] for GPU. SiftGPU processes pixels parallely to build Gaussi
💻 CPP
📖 第 1 页 / 共 5 页
字号:
	"}\n" <<'\0';



	ProgramGLSL * program = new ProgramGLSL(buffer); 
	if(program->IsNative())
	{
		s_keypoint = program ;
		//parameter
	}else
	{
		delete program;
		out.seekp(pos);
		out << 
	"	gl_FragData[1] =  vec4(dog, 0, 0, 0) ;	\n"
	"}\n" <<'\0';
		s_keypoint = program = new ProgramGLSL(buffer);
		GlobalUtil::_SubpixelLocalization = 0;
		std::cerr<<"Detection simplified on this hardware"<<endl;
	}

	_param_dog_texu = glGetUniformLocation(*program, "texU");
	_param_dog_texd = glGetUniformLocation(*program, "texD");
}


void ShaderBagGLSL::SetDogTexParam(int texU, int texD)
{
	glUniform1i(_param_dog_texu, 1);
	glUniform1i(_param_dog_texd, 2);
}

void ShaderBagGLSL::SetGenListStepParam(int tex, int tex0)
{
	glUniform1i(_param_genlist_step_tex0, 1);	
}
void ShaderBagGLSL::SetGenVBOParam( float width, float fwidth,  float size)
{
	float sizes[4] = {size*3.0f, fwidth, width, 1.0f/width};
	glUniform4fv(_param_genvbo_size, 1, sizes);

}



void ShaderBagGLSL::UnloadProgram()
{
	glUseProgram(0);
} 



void ShaderBagGLSL::LoadGenListShader(int ndoglev, int nlev)
{
	ProgramGLSL * program;

	s_genlist_init_tight = new ProgramGLSL(
	"uniform sampler2DRect tex; void main (void){\n"
	"vec4 helper = vec4( texture2DRect(tex, gl_TexCoord[0].xy).r,  texture2DRect(tex, gl_TexCoord[1].xy).r,\n"
	"texture2DRect(tex, gl_TexCoord[2].xy).r, texture2DRect(tex, gl_TexCoord[3].xy).r);\n"
	"gl_FragColor = vec4(greaterThan(helper, vec4(0.0,0.0,0.0,0.0)));\n"
	"}");

	
	s_genlist_init_ex = program = new ProgramGLSL(
	"uniform sampler2DRect tex;uniform vec2 bbox;\n"
	"void main (void ){\n"
	"vec4 helper = vec4( texture2DRect(tex, gl_TexCoord[0].xy).r,  texture2DRect(tex, gl_TexCoord[1].xy).r,\n"
	"texture2DRect(tex, gl_TexCoord[2].xy).r, texture2DRect(tex, gl_TexCoord[3].xy).r);\n"
	"bvec4 helper2 = bvec4( \n"
	"all(lessThan(gl_TexCoord[0].xy , bbox)) && helper.x >0,\n"
	"all(lessThan(gl_TexCoord[1].xy , bbox)) && helper.y >0,\n"
	"all(lessThan(gl_TexCoord[2].xy , bbox)) && helper.z >0,\n"
	"all(lessThan(gl_TexCoord[3].xy , bbox)) && helper.w >0);\n"
	"gl_FragColor = vec4(helper2);\n"
	"}");
	_param_genlist_init_bbox = glGetUniformLocation( *program, "bbox");


	//reduction ...
	s_genlist_histo = new ProgramGLSL(
	"uniform sampler2DRect tex; void main (void){\n"
	"vec4 helper; vec4 helper2; \n"
	"helper = texture2DRect(tex, gl_TexCoord[0].xy); helper2.xy = helper.xy + helper.zw; \n"
	"helper = texture2DRect(tex, gl_TexCoord[1].xy); helper2.zw = helper.xy + helper.zw; \n"
	"gl_FragColor.rg = helper2.xz + helper2.yw;\n"
	"helper = texture2DRect(tex, gl_TexCoord[2].xy); helper2.xy = helper.xy + helper.zw; \n"
	"helper = texture2DRect(tex, gl_TexCoord[3].xy); helper2.zw = helper.xy + helper.zw; \n"
	"gl_FragColor.ba= helper2.xz+helper2.yw;\n"
	"}");


	//read of the first part, which generates tex coordinates 
	s_genlist_start= program =  LoadGenListStepShader(1, 1);
	_param_ftex_width= glGetUniformLocation(*program, "width");
	_param_genlist_start_tex0 = glGetUniformLocation(*program, "tex0");
	//stepping
	s_genlist_step = program = LoadGenListStepShader(0, 1);
	_param_genlist_step_tex= glGetUniformLocation(*program, "tex");
	_param_genlist_step_tex0= glGetUniformLocation(*program, "tex0");

}

void ShaderBagGLSL::SetMarginCopyParam(int xmax, int ymax)
{
	float truncate[2] = {xmax - 0.5f , ymax - 0.5f};
	glUniform2fv(_param_margin_copy_truncate, 1, truncate);
}

void ShaderBagGLSL::SetGenListInitParam(int w, int h)
{
	float bbox[2] = {w - 1.0f, h - 1.0f};
	glUniform2fv(_param_genlist_init_bbox, 1, bbox);
}
void ShaderBagGLSL::SetGenListStartParam(float width, int tex0)
{
	glUniform1f(_param_ftex_width, width);
}


ProgramGLSL* ShaderBagGLSL::LoadGenListStepShader(int start, int step)
{
	int i;
	char buffer[10240];
	// char chanels[5] = "rgba";
	ostrstream out(buffer, 10240);

	for(i = 0; i < step; i++) out<<"uniform sampler2DRect tex"<<i<<";\n";
	if(start)
	{
		out<<"uniform float width;\n";
		out<<"void main(void){\n";
		out<<"float  index = floor(gl_TexCoord[0].y) * width + floor(gl_TexCoord[0].x);\n";
		out<<"vec2 pos = vec2(0.5, 0.5);\n";
	}else
	{
		out<<"uniform sampler2DRect tex;\n";
		out<<"void main(void){\n";
		out<<"vec4 tc = texture2DRect( tex, gl_TexCoord[0].xy);\n";
		out<<"vec2 pos = tc.rg; float index = tc.b;\n";
	}
	out<<"vec2 sum; 	vec4 cc;\n";


	if(step>0)
	{
		out<<"vec2 cpos = vec2(-0.5, 0.5);\t vec2 opos;\n";
		for(i = 0; i < step; i++)
		{

			out<<"cc = texture2DRect(tex"<<i<<", pos);\n";
			out<<"sum.x = cc.r + cc.g; sum.y = sum.x + cc.b;  \n";
			out<<"if (index <cc.r){ opos = cpos.xx;}\n";
			out<<"else if(index < sum.x ) {opos = cpos.yx; index -= cc.r;}\n";
			out<<"else if(index < sum.y ) {opos = cpos.xy; index -= sum.x;}\n";
			out<<"else {opos = cpos.yy; index -= sum.y;}\n";
			out<<"pos = (pos + pos + opos);\n";
		}
	}
	out<<"gl_FragColor = vec4(pos, index, 1.0);\n";
	out<<"}\n"<<'\0';
	return new ProgramGLSL(buffer);
}


void ShaderBagGLSL::LoadOrientationShader()
{
	char buffer[10240];
	ostrstream out(buffer,10240);

	if(GlobalUtil::_IsNvidia)
	{
	//with the following two options, the speed of GLSL is not the same as cg
	out <<	"#pragma optionNV(ifcvt none)\n"
			"#pragma optionNV(unroll all)\n";
	}

	out<<"\n"
	"#define GAUSSIAN_WF "<<GlobalUtil::_OrientationGaussianFactor<<" \n"
	"#define SAMPLE_WF ("<<GlobalUtil::_OrientationWindowFactor<< " )\n"
	"#define ORIENTATION_THRESHOLD "<< GlobalUtil::_MulitiOrientationThreshold << "\n"
	"uniform sampler2DRect tex;					\n"
	"uniform sampler2DRect gradTex;				\n"
	"uniform vec4 size;						\n"
	<< (GlobalUtil::_SubpixelLocalization || GlobalUtil::_KeepExtremumSign? 
	"	uniform sampler2DRect texS;	\n" : " ")
	<<
	"void main()		\n"
	"{													\n"
	"	vec4 bins[10];								\n"
	"	bins[0] = vec4(0.0);bins[1] = vec4(0.0);bins[2] = vec4(0.0);	\n"
	"	bins[3] = vec4(0.0);bins[4] = vec4(0.0);bins[5] = vec4(0.0);	\n"
	"	bins[6] = vec4(0.0);bins[7] = vec4(0.0);bins[8] = vec4(0.0);	\n"
	"	vec4 loc = texture2DRect(tex, gl_TexCoord[0].xy);	\n"
	"	vec2 pos = loc.xy;		\n"
	"	bool orientation_mode = (size.z != 0);			\n"
	"	float sigma = orientation_mode? abs(size.z) : loc.w; \n";
	if(GlobalUtil::_SubpixelLocalization || GlobalUtil::_KeepExtremumSign)
	{
		out<<
	"	if(orientation_mode){\n"
	"		vec4 offset = texture2DRect(texS, pos);\n"
	"		pos.xy = pos.xy + offset.yz; \n"
	"		sigma = sigma * pow(size.w, offset.w);\n"
	"		#if "<< GlobalUtil::_KeepExtremumSign << "\n"
	"			if(offset.x < 0.6) sigma = -sigma; \n"
	"		#endif\n"
	"	}\n";
	}
	out<<
	"	//bool fixed_orientation = (size.z < 0);		\n"
	"	if(size.z < 0) {gl_FragData[0] = vec4(pos, 0, sigma); return;}"
	"	float gsigma = sigma * GAUSSIAN_WF;				\n"
	"	vec2 win = abs(vec2(sigma)) * (SAMPLE_WF * GAUSSIAN_WF);	\n"
	"	vec2 dim = size.xy;							\n"
	"	float dist_threshold = win.x*win.x+0.5;			\n"
	"	float factor = -0.5/(gsigma*gsigma);			\n"
	"	vec4 sz;	vec2 spos;						\n"
	"	//if(any(pos.xy <= 1)) discard;					\n"
	"	sz.xy = max( pos - win, vec2(1,1));			\n"
	"	sz.zw = min( pos + win, dim-2);				\n"
	"	sz = floor(sz)+0.5;";
	//loop to get the histogram

	out<<"\n"
	"	for(spos.y = sz.y; spos.y <= sz.w;	spos.y+=1.0)				\n"
	"	{																\n"
	"		for(spos.x = sz.x; spos.x <= sz.z;	spos.x+=1.0)			\n"
	"		{															\n"
	"			vec2 offset = spos - pos;								\n"
	"			float sq_dist = dot(offset,offset);						\n"
	"			if( sq_dist < dist_threshold){							\n"
	"				vec4 cc = texture2DRect(gradTex, spos);				\n"
	"				float grad = cc.b;	float theta = cc.a;				\n"
	"				float idx = floor(degrees(theta)*0.1);				\n"
	"				if(idx < 0 ) idx += 36;									\n"
	"				float weight = grad*exp(sq_dist * factor);				\n"
	"				float vidx = fract(idx * 0.25) * 4.0;//mod(idx, 4.0) ;							\n"
	"				vec4 inc = weight*vec4(equal(vec4(vidx), vec4(0.0,1.0,2.0,3.0)));";

	if(GlobalUtil::_UseDynamicIndexing && GlobalUtil::_IsNvidia)
	{
		//dynamic indexing may not be faster
		out<<"\n"
	"				int iidx = int((idx*0.25));	\n"
	"				bins[iidx]+=inc;					\n"
	"			}										\n"
	"		}											\n"
	"	}";

	}else
	{
		//nvfp40 still does not support dynamic array indexing
		//unrolled binary search...
		out<<"\n"
	"				if(idx < 16)							\n"
	"				{										\n"
	"					if(idx < 8)							\n"
	"					{									\n"
	"						if(idx < 4)	{	bins[0]+=inc;}	\n"
	"						else		{	bins[1]+=inc;}	\n"
	"					}else								\n"
	"					{									\n"
	"						if(idx < 12){	bins[2]+=inc;}	\n"
	"						else		{	bins[3]+=inc;}	\n"
	"					}									\n"
	"				}else if(idx < 32)						\n"
	"				{										\n"
	"					if(idx < 24)						\n"
	"					{									\n"
	"						if(idx <20)	{	bins[4]+=inc;}	\n"
	"						else		{	bins[5]+=inc;}	\n"
	"					}else								\n"
	"					{									\n"
	"						if(idx < 28){	bins[6]+=inc;}	\n"
	"						else		{	bins[7]+=inc;}	\n"
	"					}									\n"
	"				}else 						\n"
	"				{										\n"
	"					bins[8]+=inc;						\n"
	"				}										\n"
	"			}										\n"
	"		}											\n"
	"	}";

	}

	WriteOrientationCodeToStream(out);

	ProgramGLSL * program = new ProgramGLSL(buffer);
	if(program->IsNative())
	{
		s_orientation = program ;
		_param_orientation_gtex = glGetUniformLocation(*program, "gradTex");
		_param_orientation_size = glGetUniformLocation(*program, "size");
		_param_orientation_stex = glGetUniformLocation(*program, "texS");
	}else
	{
		delete program;
	}
}


void ShaderBagGLSL::WriteOrientationCodeToStream(std::ostream& out)
{
	//smooth histogram and find the largest
/*
	smoothing kernel:	 (1 3 6 7 6 3 1 )/27
	the same as 3 pass of (1 1 1)/3 averaging
	maybe better to use 4 pass on the vectors...
*/


	//the inner loop on different array numbers is always unrolled in fp40

	//bug fixed here:)
	out<<"\n"
	"	//mat3 m1 = mat3(1, 0, 0, 3, 1, 0, 6, 3, 1)/27.0;  \n"
	"	mat3 m1 = mat3(1, 3, 6, 0, 1, 3,0, 0, 1)/27.0;  \n"
	"	mat4 m2 = mat4(7, 6, 3, 1, 6, 7, 6, 3, 3, 6, 7, 6, 1, 3, 6, 7)/27.0;\n"
	"	#define FILTER_CODE(i) {						\\\n"
	"			vec4 newb	=	(bins[i]* m2);			\\\n"
	"			newb.xyz	+=	( prev.yzw * m1);		\\\n"
	"			prev = bins[i];							\\\n"
	"			newb.wzy	+=	( bins[i+1].zyx *m1);	\\\n"
	"			bins[i] = newb;}\n"
	"	for (int j=0; j<2; j++)								\n"
	"	{												\n"
	"		vec4 prev  = bins[8];						\n"
	"		bins[9]		 = bins[0];						\n";

	if(GlobalUtil::_IsNvidia)
	{
		out<<
	"		for (int i=0; i<9; i++)							\n"
	"		{												\n"
	"			FILTER_CODE(i);								\n"
	"		}												\n"
	"	}";

	}else
	{
		//manually unroll the loop for ATI.
		out << 
	"	   FILTER_CODE(0);\n"
	"	   FILTER_CODE(1);\n"
	"	   FILTER_CODE(2);\n"
	"	   FILTER_CODE(3);\n"
	"	   FILTER_CODE(4);\n"
	"	   FILTER_CODE(5);\n"
	"	   FILTER_CODE(6);\n"
	"	   FILTER_CODE(7);\n"
	"	   FILTER_CODE(8);\n"
	"	}\n";
	}
	//find the maximum voting
	out<<"\n"
	"	vec4 maxh; vec2 maxh2; 	\n"
	"	vec4 maxh4 = max(max(max(max(max(max(max(max(bins[0], bins[1]), bins[2]), \n"
	"			bins[3]), bins[4]), bins[5]), bins[6]), bins[7]), bins[8]);\n"
	"	maxh2 = max(maxh4.xy, maxh4.zw); maxh = vec4(max(maxh2.x, maxh2.y));";

	char *testpeak_code;
	char *savepeak_code;

	//save two/three/four orientations with the largest votings?

	if(GlobalUtil::_MaxOrientation>1)
	{
		out<<"\n"
		"	vec4 Orientations = vec4(0, 0, 0, 0);				\n"
		"	vec4 weights = vec4(0,0,0,0);		";	
		
		testpeak_code = "\\\n"
		"	{test = greaterThan(bins[i], hh);";

		//save the orientations in weight-decreasing order
		if(GlobalUtil::_MaxOrientation ==2)
		{
		savepeak_code = "\\\n"
		"			if(weight <=weights.g){}\\\n"
		"			else if(weight >weights.r)\\\n"
		"			{weights.rg = vec2(weight, weights.r); Orientations.rg = vec2(th, Orientations.r);}\\\n"
		"			else {weights.g = weight; Orientations.g = th;}";
		}else if(GlobalUtil::_MaxOrientation ==3)
		{
		savepeak_code = "\\\n"
		"			if(weight <=weights.b){}\\\n"
		"			else if(weight >weights.r)\\\n"
		"			{weights.rgb = vec3(weight, weights.rg); Orientations.rgb = vec3(th, Orientations.rg);}\\\n"
		"			else if(weight >weights.g)\\\n"
		"			{weights.gb = vec2(weight, weights.g); Orientations.gb = vec2(th, Orientations.g);}\\\n"
		"			else {weights.b = weight; Orientations.b = th;}";
		}else
		{
		savepeak_code = "\\\n"
		"			if(weight <=weights.a){}\\\n"
		"			else if(weight >weights.r)\\\n"
		"			{weights = vec4(weight, weights.rgb); Orientations = vec4(th, Orientations.rgb);}\\\n"
		"			else if(weight >weights.g)\\\n"
		"			{weights.gba = vec3(weight, weights.gb); Orientations.gba = vec3(th, Orientations.gb);}\\\n"
		"			else if(weight >weights.b)\\\n"
		"			{weights.ba = vec2(weight, weights.b); Orientations.ba = vec2(th, Orientations.b);}\\\n"
		"			else {weights.a = weight; Orientations.a = th;}";

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -