📄 programcg.cpp
字号:
out<<"else {opos = cpos.yy; index -= sum.y;}}}\n";
out<<"pos = (pos + pos + opos);\n";*/
#else
out<<"cc = texRECT(tex"<<i<<", pos);\n";
out<<"if (index < cc.r) opos = cpos.xx;\n";
out<<"else if (index < cc.r + cc.g){opos = cpos.yx; index -= cc.r;}\n";
out<<"else if (index < cc.r + cc.g + cc.b){opos = cpos.xy; index -= (cc.r + cc.g);}\n";
out<<"else {opos = cpos.yy; index -= (cc.r + cc.g + cc.b);}\n";
out<<"pos = (pos + pos + opos);\n";
#endif
}
}
out<<"FragColor = float4(pos, index, 1);\n";
out<<"}\n"<<'\0';
return new ProgramCG(buffer);
}
void ShaderBagCG::SetGenListInitParam(int w, int h)
{
float bbox[2] = {w -1.0f, h - 1.0f};
cgGLSetParameter2fv(_param_genlist_init_bbox, bbox);
}
void ShaderBagCG::SetGenListStartParam(float width, int tex0)
{
cgGLSetParameter1f(_param_ftex_width, width);
if(_param_genlist_start_tex0)
{
cgGLSetTextureParameter(_param_genlist_start_tex0, tex0);
cgGLEnableTextureParameter(_param_genlist_start_tex0);
}
}
void ShaderBagCG::LoadDescriptorShaderF2()
{
//one shader outpout 128/8 = 16 , each fragout encodes 4
//const double twopi = 2.0*3.14159265358979323846;
//const double rpi = 8.0/twopi;
char buffer[10240];
ostrstream out(buffer, 10240);
out<<setprecision(8);
out<<"\n"
"#define M_PI 3.14159265358979323846\n"
"#define TWO_PI (2.0*M_PI)\n"
"#define RPI 1.2732395447351626861510701069801\n"
"#define WF size.z\n"
"void main(uniform samplerRECT tex, \n"
"uniform samplerRECT gradTex, \n"
"uniform float4 dsize, \n"
"uniform float3 size, \n"
"in float2 TexCoord0 : TEXCOORD0, \n"
"out float4 FragData0:COLOR0, \n"
"out float4 FragData1:COLOR1) \n"
"{\n"
" float2 dim = size.xy; //image size \n"
" float index = dsize.x * floor(TexCoord0.y * 0.5) + TexCoord0.x;\n"
" float idx = 8.0 * frac(index * 0.125) + 8.0 * floor(2.0 * frac(TexCoord0.y * 0.5)); \n"
" index = floor(index*0.125) + 0.49; \n"
" float2 coord = floor( float2( fmod(index, dsize.z), index*dsize.w)) + 0.5 ;\n"
" float2 pos = texRECT(tex, coord).xy; \n"
" if(any(pos.xy <= 1) || any(pos.xy >=dim-1)) "
" //discard; \n"
" { FragData0 = FragData1 = float4(0.0); return; }\n"
" float anglef = texRECT(tex, coord).z;\n"
" if(anglef > M_PI) anglef -= TWO_PI;\n"
" float sigma = texRECT(tex, coord).w; \n"
" float spt = abs(sigma * WF); //default to be 3*sigma \n";
//rotation
out<<
" float4 cscs, rots; \n"
" sincos(anglef, cscs.y, cscs.x); \n"
" cscs.zw = - cscs.xy; \n"
" rots = cscs /spt; \n"
" cscs *= spt; \n";
//here cscs is actually (cos, sin, -cos, -sin) * (factor: 3)*sigma
//and rots is (cos, sin, -cos, -sin ) /(factor*sigma)
//devide the 4x4 sift grid into 16 1x1 block, and each corresponds to a shader thread
//To use linear interoplation, 1x1 is increased to 2x2, by adding 0.5 to each side
out<<
" float4 temp; float2 pt, offsetpt; \n"
" /*the fraction part of idx is .5*/ \n"
" offsetpt.x = 4.0 * frac(idx*0.25) - 2.0; \n"
" offsetpt.y = floor(idx*0.25) - 1.5; \n"
" temp = cscs.xwyx*offsetpt.xyxy; \n"
" pt = pos + temp.xz + temp.yw; \n";
//get a horizontal bounding box of the rotated rectangle
out<<
" float2 bwin = abs(cscs.xy); \n"
" float bsz = bwin.x + bwin.y; \n"
" float4 sz; float2 spos; \n"
" sz.xy = max(pt - bsz, float2(1,1));\n"
" sz.zw = min(pt + bsz, dim - 2); \n"
" sz = floor(sz)+0.5;"; //move sample point to pixel center
//get voting for two box
out<<"\n"
" float4 DA, DB; \n"
" DA = DB = float4(0, 0, 0, 0); \n"
" for(spos.y = sz.y; spos.y <= sz.w; spos.y+=1.0) \n"
" { \n"
" for(spos.x = sz.x; spos.x <= sz.z; spos.x+=1.0) \n"
" { \n"
" float2 diff = spos - pt; \n"
" temp = rots.xywx * diff.xyxy; \n"
" float2 nxy = (temp.xz + temp.yw); \n"
" float2 nxyn = abs(nxy); \n"
" if(all(nxyn < float2(1.0)))\n"
" {\n"
" float4 cc = texRECT(gradTex, spos); \n"
" float mod = cc.b; float angle = cc.a; \n"
" float theta0 = (anglef - angle)*RPI; \n"
" float theta = theta0 < 0? theta0 + 8.0 : theta0; // fmod(theta0 + 8.0, 8.0); \n"
" diff = nxy + offsetpt.xy; \n"
" float ww = exp(-0.125*dot(diff, diff));\n"
" float2 weights = 1 - nxyn;\n"
" float weight = weights.x * weights.y *mod*ww; \n"
" float theta1 = floor(theta); \n"
" float weight2 = (theta - theta1) * weight; \n"
" float weight1 = weight - weight2;\n"
" DA += float4(theta1 == float4(0, 1, 2, 3))*weight1; \n"
" DA += float4(theta1 == float4(7, 0, 1, 2))*weight2; \n"
" DB += float4(theta1 == float4(4, 5, 6, 7))*weight1; \n"
" DB += float4(theta1 == float4(3, 4, 5, 6))*weight2; \n"
" }\n"
" }\n"
" }\n";
out<<
" FragData0 = DA; FragData1 = DB;\n"
"}\n"<<'\0';
ProgramCG * program;
s_descriptor_fp = program = new ProgramCG(buffer);
_param_descriptor_gtex = cgGetNamedParameter(*program, "gradTex");
_param_descriptor_size = cgGetNamedParameter(*program, "size");
_param_descriptor_dsize = cgGetNamedParameter(*program, "dsize");
}
//the shader that computes the descriptors
void ShaderBagCG::LoadDescriptorShader()
{
GlobalUtil::_DescriptorPPT = 16;
LoadDescriptorShaderF2();
}
void ShaderBagCG::LoadOrientationShader()
{
char buffer[10240];
ostrstream out(buffer,10240);
out<<"\n"
"#define GAUSSIAN_WF "<<GlobalUtil::_OrientationGaussianFactor<<" \n"
"#define SAMPLE_WF ("<<GlobalUtil::_OrientationWindowFactor<< " )\n"
"#define ORIENTATION_THRESHOLD "<< GlobalUtil::_MulitiOrientationThreshold << "\n"
"void main(uniform samplerRECT tex, \n"
"uniform samplerRECT gradTex, \n"
" uniform float4 size, \n"
" in float2 TexCoord0 : TEXCOORD0, \n"
" out float4 FeatureData : COLOR0 ";
//multi orientation output
//use one additional texture to store up to four orientations
//when we use one 32bit float to store two orientations, no extra texture is required
if(GlobalUtil::_MaxOrientation >1 && GlobalUtil::_OrientationPack2 == 0)
out<<", out float4 OrientationData : COLOR1";
if(GlobalUtil::_SubpixelLocalization || GlobalUtil::_KeepExtremumSign)
{
//data for sub-pixel localization
out<<", uniform samplerRECT texS";
}
//use 9 float4 to store histogram of 36 directions
out<<") \n"
"{ \n"
" float4 bins[10]; \n"
" for (int i=0; i<9; i++) bins[i] = float4(0,0,0,0); \n"
" const float4 loc = texRECT(tex, TexCoord0); \n"
" const bool orientation_mode = (size.z != 0); \n"
" float2 pos = loc.xy; \n"
" float sigma = orientation_mode? abs(size.z) : loc.w; \n";
if(GlobalUtil::_SubpixelLocalization || GlobalUtil::_KeepExtremumSign)
{
out<<
" if(orientation_mode) {\n"
" float4 keyx = texRECT(texS, pos);\n"
" sigma = sigma * pow(size.w, keyx.w); \n"
" pos.xy = pos.xy + keyx.yz; \n"
" #if " << GlobalUtil::_KeepExtremumSign << "\n"
" if(keyx.x<0.6) sigma = - sigma;\n"
" #endif\n"
" }\n";
}
out<<
" //bool fixed_orientation = (size.z < 0); \n"
" if(size.z < 0) {FeatureData = float4(pos, 0, sigma); return;}"
" const float gsigma = sigma * GAUSSIAN_WF; \n"
" const float2 win = abs(sigma.xx) * (SAMPLE_WF * GAUSSIAN_WF); \n"
" const float2 dim = size.xy; \n"
" const float dist_threshold = win.x*win.x+0.5; \n"
" const float factor = -0.5/(gsigma*gsigma); \n"
" float4 sz; float2 spos; \n"
" //if(any(pos.xy <= 1)) discard; \n"
" sz.xy = max( pos - win, float2(1,1)); \n"
" sz.zw = min( pos + win, dim-2); \n"
" sz = floor(sz)+0.5;";
//loop to get the histogram
out<<"\n"
" for(spos.y = sz.y; spos.y <= sz.w; spos.y+=1.0) \n"
" { \n"
" for(spos.x = sz.x; spos.x <= sz.z; spos.x+=1.0) \n"
" { \n"
" const float2 offset = spos - pos; \n"
" const float sq_dist = dot(offset,offset); \n"
" if( sq_dist < dist_threshold){ \n"
" const float4 cc = texRECT(gradTex, spos); \n"
" const float grad = cc.b; float theta = cc.a; \n"
" float idx = floor(degrees(theta)*0.1); \n"
" const float weight = grad*exp(sq_dist * factor); \n"
" if(idx < 0 ) idx += 36; \n"
" const float vidx = 4.0 * fract(idx * 0.25);//fmod(idx, 4); \n"
" const float4 inc = weight*float4(vidx == float4(0,1,2,3)); ";
if(GlobalUtil::_UseDynamicIndexing && strcmp(cgGetProfileString(ProgramCG::_FProfile), "gp4fp")==0)
// if(ProgramCG::_FProfile == CG_PROFILE_GPU_FP) this enumerant is not defined in cg1.5
{
//gp_fp supports dynamic indexing
out<<"\n"
" int iidx = int(floor(idx*0.25)); \n"
" bins[iidx]+=inc; \n"
" } \n"
" } \n"
" }";
}else
{
//nvfp40 still does not support dynamic array indexing
//unrolled binary search...
out<<"\n"
" if(idx < 16) \n"
" { \n"
" if(idx < 8) \n"
" { \n"
" if(idx < 4) { bins[0]+=inc;} \n"
" else { bins[1]+=inc;} \n"
" }else \n"
" { \n"
" if(idx < 12){ bins[2]+=inc;} \n"
" else { bins[3]+=inc;} \n"
" } \n"
" }else if(idx < 32) \n"
" { \n"
" if(idx < 24) \n"
" { \n"
" if(idx <20) { bins[4]+=inc;} \n"
" else { bins[5]+=inc;} \n"
" }else \n"
" { \n"
" if(idx < 28){ bins[6]+=inc;} \n"
" else { bins[7]+=inc;} \n"
" } \n"
" }else \n"
" { \n"
" bins[8]+=inc; \n"
" } \n"
" } \n"
" } \n"
" }";
}
WriteOrientationCodeToStream(out);
ProgramCG * program;
s_orientation = program = new ProgramCG(buffer);
_param_orientation_gtex = cgGetNamedParameter(*program, "gradTex");
_param_orientation_size = cgGetNamedParameter(*program, "size");
_param_orientation_stex = cgGetNamedParameter(*program, "texS");
}
void ShaderBagCG::WriteOrientationCodeToStream(std::ostream& out)
{
//smooth histogram and find the largest
/*
smoothing kernel: (1 3 6 7 6 3 1 )/27
the same as 3 pass of (1 1 1)/3 averaging
maybe better to use 4 pass on the vectors...
*/
//the inner loop on different array numbers is always unrolled in fp40
//bug fixed here:)
out<<"\n"
" float3x3 mat1 = float3x3(1, 0, 0, 3, 1, 0, 6, 3, 1)/27.0;; //bug fix.. \n"
" float4x4 mat2 = float4x4( 7, 6, 3, 1, 6, 7, 6, 3, 3, 6, 7, 6, 1, 3, 6, 7)/27.0;;\n"
" for (int j=0; j<2; j++) \n"
" { \n"
" float4 prev = bins[8]; \n"
" bins[9] = bins[0]; \n"
" for (int i=0; i<9; i++) \n"
" { \n"
" float4 newb = mul ( bins[i], mat2); \n"
" newb.xyz += mul ( prev.yzw, mat1); \n"
" prev = bins[i]; \n"
" newb.wzy += mul ( bins[i+1].zyx, mat1); \n"
" bins[i] = newb; \n"
" } \n"
" }";
//find the maximum voting
out<<"\n"
" float4 maxh; float2 maxh2; float4 maxh4 = bins[0]; \n"
" for (int i=1; i<9; i++) maxh4 = max(maxh4, bins[i]); \n"
" maxh2 = max(maxh4.xy, maxh4.zw); maxh = float4(max(maxh2.x, maxh2.y));";
char *testpeak_code;
char *savepeak_code;
//save two/three/four orientations with the largest votings?
//
if(GlobalUtil::_MaxOrientation>1)
{
out<<"\n"
" float4 Orientations = float4(0, 0, 0, 0); \n"
" float4 weights = float4(0,0,0,0); ";
testpeak_code = "\n"
" {test = bins[i]>hh;";
//save the orientations in weight-decreasing order
if(GlobalUtil::_MaxOrientation ==2)
{
savepeak_code = "\n"
" if(weight <=weights.g){}\n"
" else if(weight >weights.r)\n"
" {weights.rg = float2(weight, weights.r); Orientations.rg = float2(th, Orientations.r);}\n"
" else {weights.g = weight; Orientations.g = th;}";
}else if(GlobalUtil::_MaxOrientation ==3)
{
savepeak_code = "\n"
" if(weight <=weights.b){}\n"
" else if(weight >weights.r)\n"
" {weights.rgb = float3(weight, weights.rg); Orientations.rgb = float3(th, Orientations.rg);}\n"
" else if(weight >weights.g)\n"
" {weights.gb = float2(weight, weights.g); Orientations.gb = float2(th, Orientations.g);}\n"
" else {weights.b = weight; Orientations.b = th;}";
}else
{
savepeak_code = "\n"
" if(weight <=weights.a){}\n"
" else if(weight >weights.r)\n"
" {weights = float4(weight, weights.rgb); Orientations = float4(th, Orientations.rgb);}\n"
" else if(weight >weights.g)\n"
" {weights.gba = float3(weight, weights.gb); Orientations.gba = float3(th, Orientations.gb);}\n"
" else if(weight >weights.b)\n"
" {weights.ba = float2(weight, weights.b); Orientations.ba = float2(th, Orientations.b);}\n"
" else {weights.a = weight; Orientations.a = th;}";
}
}else
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -