⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 ludecomp.cpp

📁 使用GPU的强大并行计算能力来实现稠密矩阵的LU分解计算。
💻 CPP
📖 第 1 页 / 共 2 页
字号:
			}
		}

		PingPong01();

		Divide(k,rXmin, rYmin + deltaY, rXmax, _m);	

		// Perform sweep of all the remaining rows
		// ie. row'(i) = row(i) - row(i)[k]*row(k)[j]

		PingPong01();

		CopyRect(k+.5,k+.5,_m,k+1.5);

		row_fp.Bind();

		glBegin(GL_QUADS);
		glMultiTexCoord2f(GL_TEXTURE0_ARB,k+.5,k+1);
		glMultiTexCoord2f(GL_TEXTURE1_ARB,k+1,k+.5);
		glVertex2f(k+1,k+1);

		glMultiTexCoord2f(GL_TEXTURE0_ARB,k+.5,k+1);
		glMultiTexCoord2f(GL_TEXTURE1_ARB,_m,k+.5);
		glVertex2f(_m,k+1);

		glMultiTexCoord2f(GL_TEXTURE0_ARB,k+.5,_n);
		glMultiTexCoord2f(GL_TEXTURE1_ARB,_m,k+.5);
		glVertex2f(_m,_n);

		glMultiTexCoord2f(GL_TEXTURE0_ARB,k+.5,_n);
		glMultiTexCoord2f(GL_TEXTURE1_ARB,k+1,k+.5);
		glVertex2f(k+1,_n);

		glEnd();

		// Increase row coordinates
		rYmin = (k+1) * deltaY;
		rYmax = rYmin + deltaY;
		rXmin = (k+1) * deltaX;
		rXmax = rXmax + deltaX;
		
		++_currentDrawSurface;
		_CheckForGLError("LUDecomp::Compute()");


	}
	glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, 0);

	_bComputed = true;
}



//----------------------------------------------------------------------------
// Function     	: LUDecomp::LoadMatrix
// Description	    : 
//----------------------------------------------------------------------------
/**
 * @fn LUDecomp::LoadMatrix(const std::vector<std::vector<float> >& m)
 * @brief Sets the non zero elements of the sparse matrix
 */ 
void LUDecomp::LoadMatrix(float *data)
{
 
	GLint format = (_ncomponents == 1) ? GL_RED : GL_RGBA;
	_currentDrawSurface = 0;

	current01 = 1;


	glActiveTextureARB(GL_TEXTURE0_ARB);
	glBindTexture(GL_TEXTURE_RECTANGLE_NV,textureid[0]);   
	glTexSubImage2D(GL_TEXTURE_RECTANGLE_NV, 0, 0, 0, _m, _n, format, GL_FLOAT,data);	
	
	//Go into FBO mode, bind the uploaded data as the source texture
	glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, fb);
	glDrawBuffer( GL_COLOR_ATTACHMENT0_EXT);  
	glColorMask(~0,~0,~0,~0);
	glEnable(GL_TEXTURE_RECTANGLE_NV);	
	glActiveTextureARB( GL_TEXTURE0_ARB );


	PingPong01();
	
	glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, 0);


	++_currentDrawSurface;

}

//----------------------------------------------------------------------------
// Function     	: LUDecomp::GetMatrix
// Description	  : 
//----------------------------------------------------------------------------
/**
 * @fn LUDecomp::GetMatrix(std::vector<std::vector<float> >& m) const;
 * @brief Gets the matrix data;
 */ 
void LUDecomp::GetMatrix(std::vector<std::vector<float> >& m) const
{ 
  GLint format = (_ncomponents == 1) ? GL_RED : GL_RGBA;
  //assert(_bComputed);

  for (int i = (int)m.size() - 1; i >= 0; --i)
    m[i].clear();
  m.clear();

  std::vector<float> data(_m * _ncomponents);

  int xoffset = 0;
  int yoffset = 0;

	glBindFramebufferEXT(GL_FRAMEBUFFER_EXT,fb);
	glReadBuffer((_currentDrawSurface%2) ? GL_COLOR_ATTACHMENT0_EXT  : GL_COLOR_ATTACHMENT1_EXT );
	for (int i = 0; i < _n; ++i)
	{
		glReadPixels(xoffset, yoffset, _m, 1, format, GL_FLOAT, &data[0]);


		m.push_back(data);
		++yoffset;
	}
	glBindFramebufferEXT(GL_FRAMEBUFFER_EXT,0);
	

}


//----------------------------------------------------------------------------
// Function     	: LUDecomp::ComputeMax
// Description	  : 
//----------------------------------------------------------------------------
/**
 * @fn LUDecomp::ComputeMax
 * @brief Computes the max value using a mipmap type algorithm
 */ 
void LUDecomp::ComputeMax(int k,float*maxi,float*maxj) 
{
	
	static GLuint qid[4];

	static bool once=true;
	GLuint count;
	
	const int cache=0;

	int i;


	glActiveTextureARB(GL_TEXTURE0_ARB);

	PingPong23();
	glBindTexture(GL_TEXTURE_RECTANGLE_NV, textureid[!current01]);

	CopyRect(0,0,_m,_m);

	PingPong23();
	CopyRect(0,0,_m,_m);

	max_fp.Bind();

	if(once)
		glGenOcclusionQueriesNV(4, qid);

	int bits = _m-k;

	//put the max for each row in the column
	for(int j=31;j > 0;j--)
	{
		if(!(bits & (1 << j)))
			continue;

		int nextbits = bits & (~(1 << j));
		int cur = (1 << (j-1));

		int end=1;
		if(j == 1)
			end = 2;

		
		for(int l=0;l < end;l++)
		for (i=cur;i > 0;i = i >> 1)
		{

			PingPong23();
			

			glBegin(GL_QUADS);
			{
				int a= nextbits;
				glTexCoord2f(i+a+k,k);
				glVertex2f(a+k,k);

				glTexCoord2f(i+i+a+k,k);
				glVertex2f(i+a+k,k);

				glTexCoord2f(i+i+a+k,_m+k);
				glVertex2f(i+a+k,_m+k);

				glTexCoord2f(i+a+k,_m+k);
				glVertex2f(a+k,_m+k);
			}
			glEnd();
			
		}
		
		bits = nextbits;
		bits++;
		j++;
	}

	bits = _m-k;
	

	//find the max over the whole column
	for(int j=31;j > 0;j--)
	{
		if(!(bits & (1 << j)))
			continue;

		int nextbits = bits & (~(1 << j));
		int cur = (1 << (j-1));
		for (i=cur;i > 0;i = i >> 1)
		{
		
			PingPong23();

			glBegin(GL_QUADS);
			{
				
				int a= nextbits;
				glTexCoord2f(k,i+a+k);
				glVertex2f(k,a+k);
				glTexCoord2f(k,i+a+k);
				glVertex2f(1+k,a+k);
				glTexCoord2f(k,i+i+a+k);
				glVertex2f(1+k,i+a+k);
				glTexCoord2f(k,i+i+a+k);
				glVertex2f(k,i+a+k);
			}
			glEnd();
		}
		
		bits = nextbits;
		bits++;
		j++;

	}

	quadtree_fp.Bind();
	quadtree_fp.SetConstant(0,k,k,k,k);


	glActiveTextureARB(GL_TEXTURE0_ARB);
	glBindTexture(GL_TEXTURE_RECTANGLE_NV, textureid[!current23+2]);

	glActiveTextureARB(GL_TEXTURE1_ARB);
	glBindTexture(GL_TEXTURE_RECTANGLE_NV, textureid[!current01]);

	glActiveTextureARB(GL_TEXTURE0_ARB);



	
	PingPong23();
	glActiveTextureARB(GL_TEXTURE0_ARB);

	int xstart = k,ystart=k;

	bits = _m-k;

	for(int i=(bits+1) >> 1;i > 0;i = (i+1) >> 1)
	{
		glBeginOcclusionQueryNV(qid[0]);
		glBegin(GL_QUADS);

			//quad1
			glTexCoord2f(0,i);
			glVertex2f(xstart,ystart);

			glTexCoord2f(0,i);
			glVertex2f(xstart+i,ystart);

			glTexCoord2f(0,i+i);
			glVertex2f(xstart+i,ystart+i);

			glTexCoord2f(0,i+i);
			glVertex2f(xstart,ystart+i);

		glEnd();
		glEndOcclusionQueryNV();


		glBeginOcclusionQueryNV(qid[1]);
		glBegin(GL_QUADS);
			//quad2
			glTexCoord2f(0,i);
			glVertex2f(xstart+i,ystart);

			glTexCoord2f(0,i);
			glVertex2f(xstart+i*2,ystart);

			glTexCoord2f(0,i+i);
			glVertex2f(xstart+i*2,ystart+i);

			glTexCoord2f(0,i+i);
			glVertex2f(xstart+i,ystart+i);
		glEnd();
		glEndOcclusionQueryNV();


		glBeginOcclusionQueryNV(qid[2]);
		glBegin(GL_QUADS);
			//quad3
			glTexCoord2f(0,i);
			glVertex2f(xstart,ystart+i);

			glTexCoord2f(0,i);
			glVertex2f(xstart+i,ystart+i);

			glTexCoord2f(0,i+i);
			glVertex2f(xstart+i,ystart+i+i);

			glTexCoord2f(0,i+i);
			glVertex2f(xstart,ystart+i+i);
		glEnd();
		glEndOcclusionQueryNV();

		glBeginOcclusionQueryNV(qid[3]);
		glBegin(GL_QUADS);
			//quad4
			glTexCoord2f(0,i);
			glVertex2f(xstart+i,ystart+i);

			glTexCoord2f(0,i);
			glVertex2f(xstart+i+i,ystart+i);

			glTexCoord2f(0,i+i);
			glVertex2f(xstart+i+i,ystart+i+i);

			glTexCoord2f(0,i+i);
			glVertex2f(xstart+i,ystart+i+i);
		glEnd();
		glEndOcclusionQueryNV();

		for(int j=0;j < 4;j++)
		{
			glGetOcclusionQueryuivNV(qid[j], GL_PIXEL_COUNT_NV, &count);
			
			if(count >= 1)
			{
				xstart+= (j%2)*i;
				ystart+= (j/2)*i;
				break;
			}
		}

		if(i == 1)
			break;
	}

	*maxj = xstart;
	*maxi = ystart;

	glViewport(0, 0, _m, _n);
	once=false;
}



//----------------------------------------------------------------------------
// Function     	: LUDecomp::ComputeMax
// Description	  : 
//----------------------------------------------------------------------------
/**
 * @fn LUDecomp::ComputeMax
 * @brief Computes the max value using a mipmap type algorithm
 */ 
void LUDecomp::ComputeMax(int k,float*maxi) 
{
	static GLuint qid[4];

	static bool once=true;
	GLuint count;
	

	PingPong23();

	const int cache=0;

	int i;
	

	glActiveTextureARB(GL_TEXTURE0_ARB);
	glBindTexture(GL_TEXTURE_RECTANGLE_NV, textureid[!current01]);
	

	CopyRect(k,k,_m,k+1);

	PingPong23();

	glBindTexture(GL_TEXTURE_RECTANGLE_NV, textureid[!current01]);

	CopyRect(k,k,_m,k+1);

	max_fp.Bind();

	if(once)
		glGenOcclusionQueriesNV(4, qid);

	int bits = _m-k;

	for(int j=31;j > 0;j--)
	{
		if(!(bits & (1 << j)))
			continue;

		if(bits & (bits-1))
			bits = 1 << (j+1);
		
		break;
	}
	for (i=bits;i > 0;i = i >> 1)
	{

		PingPong23();
		
		_currentDrawSurface++;

		glBegin(GL_QUADS);
		{
			glTexCoord2f(k+i,k);
			glVertex2f(k,k);

			glTexCoord2f(k+i,k);
			glVertex2f(k,k+1);
			
			glTexCoord2f(k+i*2,k);
			glVertex2f(k+i,k+1);
			
			glTexCoord2f(k+i*2,k);
			glVertex2f(k+i,k);
		}
		glEnd();
	}

	
	glActiveTextureARB(GL_TEXTURE0_ARB);
	glBindTexture(GL_TEXTURE_RECTANGLE_NV, textureid[!current23+2]);

	glActiveTextureARB(GL_TEXTURE1_ARB);
	glBindTexture(GL_TEXTURE_RECTANGLE_NV, textureid[!current01]);

	glActiveTextureARB(GL_TEXTURE0_ARB);

	quadtree_fp.Bind();
	quadtree_fp.SetConstant(0,k,k,k,k);

	PingPong23();

	float xstart = k+.5,ystart=k+.5;

	bits = _m-k;
	
	for(int i=(bits+1) >> 1;i > 0;i = (i+1) >> 1)
	{
		glBeginOcclusionQueryNV(qid[0]);
		glBegin(GL_QUADS);

			//quad1

			glTexCoord2f(i,0);
			glVertex2f(xstart,ystart);

			glTexCoord2f(i,0);
			glVertex2f(xstart,ystart+1);

			glTexCoord2f(i+i,0);
			glVertex2f(xstart+i,ystart+1);

			glTexCoord2f(i+i,0);
			glVertex2f(xstart+i,ystart);

		glEnd();
		glEndOcclusionQueryNV();

		glBeginOcclusionQueryNV(qid[1]);
		glBegin(GL_QUADS);
			//quad2
			glTexCoord2f(i,0);
			glVertex2f(xstart+i,ystart);

			glTexCoord2f(i,0);
			glVertex2f(xstart+i,ystart+1);

			glTexCoord2f(i+i,0);
			glVertex2f(xstart+i+i,ystart+1);

			glTexCoord2f(i+i,0);
			glVertex2f(xstart+i+i,ystart);
		glEnd();
		glEndOcclusionQueryNV();

		for(int j=0;j < 2;j++)
		{
			glGetOcclusionQueryuivNV(qid[j], GL_PIXEL_COUNT_NV, &count);
			
			if(count >= 1)
			{
				xstart+= j*i;
				break;
			}
		}

		if(i == 1)
			break;
	}
	
	*maxi = xstart-.5;

	glViewport(0, 0, _m, _n);
	
	once=false;


}


//----------------------------------------------------------------------------
// Function     	: LUDecomp::GetMatrix
// Description	  : 
//----------------------------------------------------------------------------
/**
 * @fn LUDecomp::GetMatrix
 * @brief Gets the matrix 
 */ 
void LUDecomp::GetMatrix(float* m) 
{ 
	GLint format = (_ncomponents == 1) ? GL_RED : GL_RGBA;
	//assert(_bComputed);


	glBindFramebufferEXT(GL_FRAMEBUFFER_EXT,fb);
	glReadBuffer((_currentDrawSurface%2) ? GL_COLOR_ATTACHMENT0_EXT  : GL_COLOR_ATTACHMENT1_EXT );
	glReadPixels(0,0,_m,_n,format,GL_FLOAT,(GLvoid*)m);
	glBindFramebufferEXT(GL_FRAMEBUFFER_EXT,0);
}


//----------------------------------------------------------------------------
// Function     	: LUDecomp::GetPivot
// Description	  : 
//----------------------------------------------------------------------------
/**
 * @fn LUDecomp::GetPivot
 * @brief Gets the pivot matrices
 */ 
void LUDecomp::GetPivot(int* rpivot,int *cpivot) const
{
	for (int i = 0; i < _n; ++i)
		rpivot[i] = _rowpivot[i];
	for (int i = 0; i < _m; ++i)
		cpivot[i] = _colpivot[i];
}

//----------------------------------------------------------------------------
// Function     	: LUDecomp::GetPivot
// Description	  : 
//----------------------------------------------------------------------------
/**
 * @fn LUDecomp::GetPivot
 * @brief Gets the pivot matrix 
 */ 
void LUDecomp::GetPivot(int* rpivot) const
{
	for (int i = 0; i < _n; ++i)
		rpivot[i] = _rowpivot[i];
}


//----------------------------------------------------------------------------
// Function     	: static CheckForGLError
// Description	  : local check for GL error
//----------------------------------------------------------------------------
void LUDecomp::_CheckForGLError( char *msg )
{
#if defined(DEBUG) | defined(_DEBUG)
	GLenum errCode;
	const GLubyte *errStr;
	if ((errCode = glGetError()) != GL_NO_ERROR) 
	{
		errStr = gluErrorString(errCode);
		fprintf(stderr,"OpenGL ERROR: %s: %s\n", errStr, msg);
	}
#endif
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -