⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 programglsl.cpp

📁 SiftGPU is an implementation of SIFT [1] for GPU. SiftGPU processes pixels parallely to build Gaussi
💻 CPP
📖 第 1 页 / 共 5 页
字号:
		}

	}else
	{
		out<<"\n"
		"	float Orientation;				";
		testpeak_code ="\\\n"
		"	if(npeaks<=0.0){\\\n"
		"	test = equal(bins[i], maxh)	;";
		savepeak_code="\\\n"
		"			npeaks++;	\\\n"
		"			Orientation = th;";

	}
	//find the peaks
	out <<"\n"
	"	#define FINDPEAK(i, k)"	<<testpeak_code<<"\\\n"
	"	if( any ( test) )							\\\n"
	"	{											\\\n"
	"		if(test.r && bins[i].x > prevb && bins[i].x > bins[i].y )	\\\n"
	"		{											\\\n"
	"		    float	di = -0.5 * (bins[i].y-prevb) / (bins[i].y+prevb-bins[i].x - bins[i].x) ; \\\n"
	"		    float	th = (k+di+0.5);	float weight = bins[i].x;"
				<<savepeak_code<<"\\\n"
	"		}\\\n"
	"		else if(test.g && all( greaterThan(bins[i].yy , bins[i].xz)) )	\\\n"
	"		{											\\\n"
	"		    float	di = -0.5 * (bins[i].z-bins[i].x) / (bins[i].z+bins[i].x-bins[i].y- bins[i].y) ; \\\n"
	"		    float	th = (k+di+1.5);	float weight = bins[i].y;				"
				<<savepeak_code<<"	\\\n"
	"		}\\\n"
	"		if(test.b && all( greaterThan( bins[i].zz , bins[i].yw)) )	\\\n"
	"		{											\\\n"
	"		    float	di = -0.5 * (bins[i].w-bins[i].y) / (bins[i].w+bins[i].y-bins[i].z- bins[i].z) ; \\\n"
	"		    float	th = (k+di+2.5);	float weight = bins[i].z;				"
				<<savepeak_code<<"	\\\n"
	"		}\\\n"
	"		else if(test.a && bins[i].w > bins[i].z && bins[i].w > bins[i+1].x )	\\\n"
	"		{											\\\n"
	"		    float	di = -0.5 * (bins[i+1].x-bins[i].z) / (bins[i+1].x+bins[i].z-bins[i].w - bins[i].w) ; \\\n"
	"		    float	th = (k+di+3.5);	float weight = bins[i].w;				"
				<<savepeak_code<<"	\\\n"
	"		}\\\n"
	"	}}\\\n"
	"	prevb = bins[i].w;";
	//the following loop will be unrolled anyway in fp40,
	//taking more than 1000 instrucsions..
	//....
	if(GlobalUtil::_IsNvidia)
	{
	out<<"\n"
	"	vec4 hh = maxh * ORIENTATION_THRESHOLD;	bvec4 test;	\n"
	"	bins[9] = bins[0];								\n"
	"	float npeaks = 0.0, k = 0;						\n"
	"	float prevb	= bins[8].w;						\n"
	"	for (int i = 0; i <9 ; i++)						\n"
	"	{\n"
	"		FINDPEAK(i, k);\n"
	"		k = k + 4.0;	\n"
	"	}";
	}else
	{
		//loop unroll for ATI.
	out <<"\n"
	"	vec4 hh = maxh * ORIENTATION_THRESHOLD; bvec4 test;\n"
	"	bins[9] = bins[0];								\n"
	"	float npeaks = 0.0;								\n"
	"	float prevb	= bins[8].w;						\n"
	"	FINDPEAK(0, 0.0);\n"
	"	FINDPEAK(1, 4.0);\n"
	"	FINDPEAK(2, 8.0);\n"
	"	FINDPEAK(3, 12.0);\n"
	"	FINDPEAK(4, 16.0);\n"
	"	FINDPEAK(5, 20.0);\n"
	"	FINDPEAK(6, 24.0);\n"
	"	FINDPEAK(7, 28.0);\n"
	"	FINDPEAK(8, 32.0);\n";
	}
	//WRITE output
	if(GlobalUtil::_MaxOrientation>1)
	{
	out<<"\n"
	"	if(orientation_mode){\n"
	"		npeaks = dot(vec4(1,1,"
			<<(GlobalUtil::_MaxOrientation>2 ? 1 : 0)<<","
			<<(GlobalUtil::_MaxOrientation >3? 1 : 0)<<"), vec4(greaterThan(weights, hh)));\n"
	"		gl_FragData[1] = radians((Orientations )*10.0);\n"
	"		gl_FragData[0] = vec4(pos, npeaks, sigma);\n"
	"	}else{\n"
	"		gl_FragData[0] = vec4(pos, radians((Orientations.x)*10.0), sigma);\n"
	"	}\n";
	}else
	{
	out<<"\n"
	"	 gl_FragData[0] = vec4(pos, radians((Orientation.x)*10.0), sigma);\n";
	}
	//end
	out<<"\n"
	"}\n"<<'\0';


}

void ShaderBagGLSL::SetSimpleOrientationInput(int oTex, float sigma, float sigma_step)
{
	glUniform1i(_param_orientation_gtex, 1);
	glUniform1f(_param_orientation_size, sigma);
}




void ShaderBagGLSL::SetFeatureOrientationParam(int gtex, int width, int height, float sigma, int stex, float step)
{
	///
	glUniform1i(_param_orientation_gtex, 1);	

	if((GlobalUtil::_SubpixelLocalization || GlobalUtil::_KeepExtremumSign)&& stex)
	{
		//specify texutre for subpixel subscale localization
		glUniform1i(_param_orientation_stex, 2);
	}

	float size[4];
	size[0] = (float)width;
	size[1] = (float)height;
	size[2] = sigma;
	size[3] = step;
	glUniform4fv(_param_orientation_size, 1, size);

}


void ShaderBagGLSL::LoadDescriptorShaderF2()
{
	//one shader outpout 128/8 = 16 , each fragout encodes 4
	//const double twopi = 2.0*3.14159265358979323846;
	//const double rpi  = 8.0/twopi;
	char buffer[10240];
	ostrstream out(buffer, 10240);

	out<<setprecision(8);

	out<<"\n"
	"#define M_PI 3.14159265358979323846\n"
	"#define TWO_PI (2.0*M_PI)\n"
	"#define RPI 1.2732395447351626861510701069801\n"
	"#define WF  size.z\n"
	"uniform sampler2DRect tex;				\n"
	"uniform sampler2DRect gradTex;			\n"
	"uniform vec4 dsize;						\n"
	"uniform vec3 size;						\n"
	"void main()		\n"
	"{\n"
	"	vec2 dim	= size.xy;	//image size			\n"
	"	float index = dsize.x*floor(gl_TexCoord[0].y * 0.5) + gl_TexCoord[0].x;\n"
	"	float idx = 8.0 * fract(index * 0.125) + 8.0 * floor(2.0 * fract(gl_TexCoord[0].y * 0.5));		\n"
	"	index = floor(index*0.125) + 0.49;  \n"
	"	vec2 coord = floor( vec2( mod(index, dsize.z), index*dsize.w)) + 0.5 ;\n"
	"	vec2 pos = texture2DRect(tex, coord).xy;		\n"
	"	if(any(lessThanEqual(pos.xy,  vec2(1.0))) || any(greaterThanEqual(pos.xy, dim-1.0)))// discard;	\n"
	"	{ gl_FragData[0] = gl_FragData[1] = vec4(0.0); return; }\n"
	"	float  anglef = texture2DRect(tex, coord).z;\n"
	"	if(anglef > M_PI) anglef -= TWO_PI;\n"
	"	float sigma = texture2DRect(tex, coord).w; \n"
	"	float spt  = abs(sigma * WF);	//default to be 3*sigma	\n";

	//rotation
	out<<
	"	vec4 cscs, rots;								\n"
	"	cscs.y = sin(anglef);	cscs.x = cos(anglef);	\n"
	"	cscs.zw = - cscs.xy;							\n"
	"	rots = cscs /spt;								\n"
	"	cscs *= spt; \n";

	//here cscs is actually (cos, sin, -cos, -sin) * (factor: 3)*sigma
	//and rots is  (cos, sin, -cos, -sin ) /(factor*sigma)
	//devide the 4x4 sift grid into 16 1x1 block, and each corresponds to a shader thread
	//To use linear interoplation, 1x1 is increased to 2x2, by adding 0.5 to each side

	out<<
	"vec4 temp; vec2 pt, offsetpt;				\n"
	"	/*the fraction part of idx is .5*/			\n"
	"	offsetpt.x = 4.0* fract(idx*0.25) - 2.0;				\n"
	"	offsetpt.y = floor(idx*0.25) - 1.5;			\n"
	"	temp = cscs.xwyx*offsetpt.xyxy;				\n"
	"	pt = pos + temp.xz + temp.yw;				\n";
	
	//get a horizontal bounding box of the rotated rectangle
	out<<
	"	vec2 bwin = abs(cscs.xy);					\n"
	"	float bsz = bwin.x + bwin.y;					\n"
	"	vec4 sz;					\n"
	"	sz.xy = max(pt - vec2(bsz), vec2(1,1));\n"
	"	sz.zw = min(pt + vec2(bsz), dim - 2);		\n"
	"	sz = floor(sz)+0.5;"; //move sample point to pixel center
	//get voting for two box

	out<<"\n"
	"	vec4 DA, DB; vec2 spos;			\n"
	"	DA = DB  = vec4(0, 0, 0, 0);		\n"
	"	for(spos.y = sz.y; spos.y <= sz.w;	spos.y+=1.0)				\n"
	"	{																\n"
	"		for(spos.x = sz.x; spos.x <= sz.z;	spos.x+=1.0)			\n"
	"		{															\n"
	"			vec2 diff = spos - pt;								\n"
	"			temp = rots.xywx * diff.xyxy;\n"
	"			vec2 nxy = (temp.xz + temp.yw); \n"
	"			vec2 nxyn = abs(nxy);			\n"
	"			if(all( lessThan(nxyn, vec2(1.0)) ))\n"
	"			{\n"
	"				vec4 cc = texture2DRect(gradTex, spos);						\n"
	"				float mod = cc.b;	float angle = cc.a;					\n"
	"				float theta0 = RPI * (anglef - angle);				\n"
	"				float theta = theta0 < 0.0? theta0 + 8.0 : theta0;;\n"
	"				diff = nxy + offsetpt.xy;								\n"
	"				float ww = exp(-0.125*dot(diff, diff));\n"
	"				vec2 weights = 1 - nxyn;\n"
	"				float weight = weights.x * weights.y *mod*ww; \n"
	"				float theta1 = floor(theta); \n"
	"				float weight2 = (theta - theta1) * weight;\n"
	"				float weight1 = weight - weight2;\n"
	"				DA += vec4(equal(vec4(theta1),  vec4(0, 1, 2, 3)))*weight1;\n"
	"				DA += vec4(equal(vec4(theta1),  vec4(7, 0, 1, 2)))*weight2; \n"
	"				DB += vec4(equal(vec4(theta1),  vec4(4, 5, 6, 7)))*weight1;\n"
	"				DB += vec4(equal(vec4(theta1),  vec4(3, 4, 5, 6)))*weight2; \n"
	"			}\n"
	"		}\n"
	"	}\n";

	out<<
	"	 gl_FragData[0] = DA; gl_FragData[1] = DB;\n"
	"}\n"<<'\0';

	ProgramGLSL * program =  new ProgramGLSL(buffer); 

	if(program->IsNative())
	{
		s_descriptor_fp = program ;
		_param_descriptor_gtex = glGetUniformLocation(*program, "gradTex");
		_param_descriptor_size = glGetUniformLocation(*program, "size");
		_param_descriptor_dsize = glGetUniformLocation(*program, "dsize");
	}else
	{
		delete program;
	}


}

void ShaderBagGLSL::LoadDescriptorShader()
{
	GlobalUtil::_DescriptorPPT = 16;
	LoadDescriptorShaderF2();
}


void ShaderBagGLSL::SetFeatureDescirptorParam(int gtex, int otex, float dwidth, float fwidth,  float width, float height, float sigma)
{
	///
	glUniform1i(_param_descriptor_gtex, 1);	

	float dsize[4] ={dwidth, 1.0f/dwidth, fwidth, 1.0f/fwidth};
	glUniform4fv(_param_descriptor_dsize, 1, dsize);
	float size[3];
	size[0] = width;
	size[1] = height;
	size[2] = GlobalUtil::_DescriptorWindowFactor;
	glUniform3fv(_param_descriptor_size, 1, size);

}

/////////////////////////////////////////////////////////////////////////////////////////////////////////////////

void ShaderBagPKSL::LoadFixedShaders()
{
	ProgramGLSL * program;


	s_gray = new ProgramGLSL( 
	"uniform sampler2DRect tex; void main(){\n"
	"float intensity = dot(vec3(0.299, 0.587, 0.114), texture2DRect(tex,gl_TexCoord[0].xy ).rgb);\n"
	"gl_FragColor= vec4(intensity, intensity, intensity, 1.0);}"	);


	s_sampling = new ProgramGLSL(
	"uniform sampler2DRect tex; void main(){\n"
	"gl_FragColor= vec4(	texture2DRect(tex,gl_TexCoord[0].st ).r,texture2DRect(tex,gl_TexCoord[1].st ).r,\n"
	"						texture2DRect(tex,gl_TexCoord[2].st ).r,texture2DRect(tex,gl_TexCoord[3].st ).r);}"	);


	s_margin_copy = program = new ProgramGLSL(
	"uniform sampler2DRect tex;  uniform vec4 truncate; void main(){\n"
	"vec4 cc = texture2DRect(tex, min(gl_TexCoord[0].xy, truncate.xy)); \n"
	"bvec2 ob = lessThan(gl_TexCoord[0].xy, truncate.xy);\n"
	"if(ob.y) { gl_FragColor = (truncate.z ==0 ? cc.rrbb : cc.ggaa); } \n"
	"else if(ob.x) {gl_FragColor = (truncate.w <1.5 ? cc.rgrg : cc.baba);} \n"
	"else {	vec4 weights = vec4(vec4(0, 1, 2, 3) == truncate.w);\n"
	"float v = dot(weights, cc); gl_FragColor = vec4(v);}}");

	_param_margin_copy_truncate = glGetUniformLocation(*program, "truncate");



	s_zero_pass = new ProgramGLSL("void main(){gl_FragColor = vec4(0.0);}");



	s_grad_pass = program = new ProgramGLSL(
	"uniform sampler2DRect tex; uniform sampler2DRect texp; void main ()\n"
	"{\n"
	"	vec4 v1, v2, gg;\n"
	"	vec4 cc = texture2DRect(tex, gl_TexCoord[0].xy);\n"
	"	vec4 cp = texture2DRect(texp, gl_TexCoord[0].xy);\n"
	"	gl_FragData[0] = cc - cp; \n"
	"	vec4 cl = texture2DRect(tex, gl_TexCoord[1].xy); vec4 cr = texture2DRect(tex, gl_TexCoord[2].xy);\n"
	"	vec4 cd = texture2DRect(tex, gl_TexCoord[3].xy); vec4 cu = texture2DRect(tex, gl_TexCoord[4].xy);\n"
	"	vec4 dx = (vec4(cr.rb, cc.ga) - vec4(cc.rb, cl.ga)).zxwy;\n"
	"	vec4 dy = (vec4(cu.rg, cc.ba) - vec4(cc.rg, cd.ba)).zwxy;\n"
	"	vec4 grad = 0.5 * sqrt(dx*dx + dy * dy);\n"
	"	gl_FragData[1] = grad;\n"
	"	vec4 invalid = vec4(equal(grad, vec4(0.0)));	\n"
	"	vec4 ov = atan(dy, dx + invalid);		\n"
	"	gl_FragData[2] = ov; \n"
	"}\n\0"); //when 

	_param_grad_pass_texp = glGetUniformLocation(*program, "texp");


	GlobalUtil::_OrientationPack2 = 0;
	LoadOrientationShader();

	if(s_orientation == NULL)
	{
		//Load a simplified version if the right version is not supported
		s_orientation = program =  new ProgramGLSL(
		"uniform sampler2DRect fTex; uniform sampler2DRect oTex; uniform vec2 size; void main(){\n"
		"	vec4 cc = texture2DRect(fTex, gl_TexCoord[0].xy);\n"
		"	vec2 co = cc.xy * 0.5; \n"
		"	vec4 oo = texture2DRect(oTex, co);\n"
		"	bvec2 bo = lessThan(fract(co), vec2(0.5)); \n"
		"	float o = bo.y? (bo.x? oo.r : oo.g) : (bo.x? oo.b : oo.a); \n"
		"	gl_FragColor = vec4(cc.rg, o, size.x * pow(size.y, cc.a));}");  
		_param_orientation_gtex= glGetUniformLocation(*program, "oTex");
		_param_orientation_size= glGetUniformLocation(*program, "size");

		GlobalUtil::_MaxOrientation = 0;
		GlobalUtil::_FullSupported = 0;
		std::cerr<<"Orientation simplified on this hardware"<<endl;
	}

	if(GlobalUtil::_DescriptorPPT)
	{
		LoadDescriptorShader();
		if(s_descriptor_fp == NULL) 
		{
			GlobalUtil::_DescriptorPPT = GlobalUtil::_FullSupported = 0; 
			std::cerr<<"Descriptor ignored on this hardware"<<endl;
		}
	}
}


void ShaderBagPKSL::LoadDisplayShaders()
{
	ProgramGLSL * program;

	s_copy_key = new ProgramGLSL(
	"uniform sampler2DRect tex;void main(){\n"
	"gl_FragColor= vec4(texture2DRect(tex, gl_TexCoord[0].xy).rg, 0,1);}");

	//shader used to write a vertex buffer object
	//which is used to draw the quads of each feature
	s_vertex_list = program = new ProgramGLSL(
	"uniform sampler2DRect tex; uniform vec4 sizes; void main(){\n"
	"float fwidth = sizes.y; \n"
	"float twidth = sizes.z; \n"
	"float rwidth = sizes.w; \n"
	"float index = 0.1*(fwidth*floor(gl_TexCoord[0].y) + gl_TexCoord[0].x);\n"
	"float px = mod(index, twidth);\n"
	"vec2 tpos= floor(vec2(px, index*rwidth))+0.5;\n"
	"vec4 cc = texture2DRect(tex, tpos );\n"
	"float size = 3.0f * cc.a; \n"
	"gl_FragColor.zw = vec2(0.0, 1.0);\n"
	"if(any(lessThan(cc.xy,vec2(0.0)))) {gl_FragColor.xy = cc.xy;}else \n"
	"{\n"
	"	float type = fract(px);\n"
	"	vec2 dxy; float s, c;\n"
	"	dxy.x = type < 0.1 ? 0.0 : ((type <0.5 || type > 0.9)? size : -size);\n"
	"	dxy.y = type < 0.2 ? 0.0 : ((type < 0.3 || type > 0.7 )? -size :size); \n"
	"	s = sin(cc.b); c = cos(cc.b); \n"
	"	gl_FragColor.x = cc.x + c*dxy.x-s*dxy.y;\n"
	"	gl_FragColor.y = cc.y + c*dxy.y+s*dxy.x;}\n"
	"}\n\0");
	/*gl_FragColor = vec4(tpos, 0.0, 1.0);}\n\0");*/

	_param_genvbo_size = glGetUniformLocation(*program, "sizes");

	s_display_gaussian = new ProgramGLSL(
	"uniform sampler2DRect tex; void main(){\n"
    "vec4 pc = texture2DRect(tex, gl_TexCoord[0].xy);	bvec2 ff = lessThan(fract(gl_TexCoord[0].xy), vec2(0.5));\n"
    "float v = ff.y?(ff.x? pc.r : pc.g):(ff.x?pc.b:pc.a); gl_FragColor = vec4(vec3(v), 1.0);}");

	s_display_dog =  new ProgramGLSL(

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -