⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 densematrixmatrixoperation.cpp

📁 PDE simulator on GPU.
💻 CPP
📖 第 1 页 / 共 2 页
字号:
// DenseMatrixMatrixOperation.cpp: implementation of the DenseMatrixMatrixOperation class.
//
//////////////////////////////////////////////////////////////////////

#include "stdafx.h"
#include "shadow.h"
#include "DenseMatrixOnGPU.h"
#include "DenseMatrixMatrixOperation.h"

#include "FragmentProgram.h"
#include "InitOpenGL.h"
#include "CommonHW.h"

#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif

////////////////////////////////////////////////////////////////////////////////
/// Matrix result(m*n) = Matrix A(m*l) * Matrix B(l*n)
/// using Larson-McAllister Matrix Multiplication 2001
/// NOTE: texture---GL_TEXTURE_RECTANGLE_NV, SAME TO MATRIX DATA
/// TEXTURE IS NOT POWER OF 2
/// Support TEX_RECTANGLE
/// in Fragment Program Mode
////////////////////////////////////////////////////////////////////////////////
void  clMatMatFP(const CDenseMatrixOnGPU& A, const CDenseMatrixOnGPU& B, CDenseMatrixOnGPU& result)
{
	int i;
	unsigned int ShaderID;

	//////////////////// Tex0-----DimY*nPass---A--x=nPass,y=Height;
	//////////////////// Tex1-----nPass*DimX---B--x=Width, y=nPass;
	//////////////////// Result---Height*Width-------x=Width, y=Height;
	//// Result Matrix's size
	unsigned int _iTextureTarget = A.GetTextureTarget();
	int Height = A.GetHeight();
	int Width  = B.GetWidth();
	int nPass  = A.GetWidth();
	if(nPass != B.GetHeight())
	{
		AfxMessageBox("For the reason of unmatched matrix size, it's wrong to muliply two matrices in clMatMatFP!");
		return;
	}
	if(_iTextureTarget == GL_TEXTURE_2D)
	{
		AfxMessageBox("Not support GL_TEXTURE_2D in current system(clMatMatFP) for the reason of float precision!");
		return;
	}
	
	//// Here Generate the Fragment Program according to the actual matrix size
	char strStart[]= "!!FP1.0\n\
DECLARE OneTexOffset ={0, 0, 0, 0};\n\
MOV R1.y, f[TEX0];\n\
MOV R2.x, f[TEX1];\n\
TEX R4, R1, TEX0, RECT;\n\
TEX R5, R2, TEX1, RECT;\n\
MAD R3, R4, R5, R3;\n";

	char strStepPart[]=	"ADD R1.x, R1, OneTexOffset.x;\n\
ADD R2.y, R2, OneTexOffset.y;\n\
TEX R4, R1, TEX0, RECT;\n\
TEX R5, R2, TEX1, RECT;\n\
MAD R3, R4, R5, R3;\n";

	 char strEndPart[]="MOV o[COLR], R3;\nEND\0";

	//// Combine the whole Fragment Program together
	int nSteps = nPass - 1;
	int nInstructions = nSteps*5 + 6;
	int totalsize = strlen(strStart) + strlen(strStepPart)*nSteps + strlen(strEndPart);
	if(nInstructions > ResourceLimitOnCurrentHW.FragmentProgram.MaxInstructionsNum)
	{
		AfxMessageBox("Due to the instruction limitation, \
			this program can not do such big matrix-matrix multiplication in clMatMatFP!");
		return;
	}
	char *strShader = new char[totalsize*2];
	if(strShader == NULL)
	{
		AfxMessageBox("Error to allocate memory for string shader in clMatMatFP!");
		return;
	}
	strcpy(strShader, strStart);
	for(i=0;i<nSteps;i++)
	{
		strcat(strShader, strStepPart);
	}
	strcat(strShader, strEndPart);

	LoadFragmentProgramFromString((const unsigned char*)strShader, ShaderID);

	delete strShader;

	unsigned char strTexOffset[] = "OneTexOffset";
	int len = strlen((const char*)strTexOffset);

	float TexOffset[] = { 1.0, 1.0, 0.0, 0.0};

	glClear( GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT );
	glDisable(GL_DEPTH_TEST);
	glEnable(GL_FRAGMENT_PROGRAM_NV);
	
	/// Transfer the offset variables into the fragment program by names
	glProgramNamedParameter4fvNV(ShaderID,  len, strTexOffset, TexOffset);

	/// Bind the predefined fragment program object 
	glBindProgramNV(GL_FRAGMENT_PROGRAM_NV, ShaderID);

	glActiveTextureARB( GL_TEXTURE0_ARB );
	glBindTexture(_iTextureTarget, A.GetWholeTextureID());

	glActiveTextureARB( GL_TEXTURE1_ARB );
	glBindTexture(_iTextureTarget, B.GetWholeTextureID());
	
	glBegin( GL_QUADS );			
		glMultiTexCoord2dARB(GL_TEXTURE0_ARB, 0, 0);    //// A
		glMultiTexCoord2dARB(GL_TEXTURE1_ARB, 0, 0);    //// B
		glVertex2i( 0, 0);

		glMultiTexCoord2dARB(GL_TEXTURE0_ARB, Width, 0);	 //// A
		glMultiTexCoord2dARB(GL_TEXTURE1_ARB, Width, 0);  //// B	
		glVertex2i( Width, 0);

		glMultiTexCoord2dARB(GL_TEXTURE0_ARB, Width, Height);  //// A
		glMultiTexCoord2dARB(GL_TEXTURE1_ARB, Width, Height);  //// B	
		glVertex2i( Width, Height);

		glMultiTexCoord2dARB(GL_TEXTURE0_ARB, 0, Height);  //// A
		glMultiTexCoord2dARB(GL_TEXTURE1_ARB, 0, Height);	 //// B
		glVertex2i( 0, Height);
	glEnd();


	////save the immediate texture result into 
	glBindTexture(_iTextureTarget, result.GetWholeTextureID());
	glCopyTexSubImage2D(_iTextureTarget, 0, 0, 0, 0, 0, Width, Height);
	glDisable(GL_FRAGMENT_PROGRAM_NV);

	return;
}

////////////////////////////////////////////////////////////////////////////////
/// Matrix result(m*n) = Matrix A(m*l) * Matrix B(l*n)
/// Single-Channel method but in BlockNum blocks.
/// Here BlockNum blocks are blended together instead of using a temporary Texture
/// NOTE: texture---GL_TEXTURE_RECTANGLE_NV, SAME TO MATRIX DATA
/// TEXTURE IS NOT POWER OF 2
/// Support TEX_RECTANGLE
/// in Fragment Program Mode
////////////////////////////////////////////////////////////////////////////////
void  clMatMatBlockFP(const CDenseMatrixOnGPU& A, const CDenseMatrixOnGPU& B, CDenseMatrixOnGPU& result)
{
	int i;
	unsigned int ShaderID;

	//////////////////// Tex0-----Height*nPass---A--x=nPass,y=Height;
	//////////////////// Tex1-----nPass*Width---B--x=Width, y=nPass;
	//////////////////// Result---Height*Width-------x=Width, y=Height;
	//// Result Matrix's size
	unsigned int _iTextureTarget = A.GetTextureTarget();
	int Height = A.GetHeight();
	int Width  = B.GetWidth();
	int nPass  = A.GetWidth();
	if(nPass != B.GetHeight())
	{
		AfxMessageBox("For the reason of unmatched matrix size, it's wrong to muliply two matrices in clMatMatBlockFP!");
		return;
	}
	if(_iTextureTarget == GL_TEXTURE_2D)
	{
		AfxMessageBox("Not support GL_TEXTURE_2D in current system(clMatMatFP) for the reason of float precision!");
		return;
	}
	
	//// Here Generate the Fragment Program according to the actual matrix size
	char strStart[]= "!!FP1.0\n\
DECLARE OneTexOffset ={0, 0, 0, 0};\n\
MOV R1.y, f[TEX0];\n\
MOV R2.x, f[TEX1];\n\
MOV R1.x, OneTexOffset.w;\n\
MOV R2.y, OneTexOffset.w;\n\
TEX R4, R1, TEX0, RECT;\n\
TEX R5, R2, TEX1, RECT;\n\
MAD R3, R4, R5, R3;\n";

	char strStepPart[]=	"ADD R1.x, R1, OneTexOffset.x;\n\
ADD R2.y, R2, OneTexOffset.y;\n\
TEX R4, R1, TEX0, RECT;\n\
TEX R5, R2, TEX1, RECT;\n\
MAD R3, R4, R5, R3;\n";

	char strEndPart[]="TEX R4, f[TEX2], TEX2, RECT;\nADD o[COLR], R4, R3;\nEND\0";

	//////////////////////////////////////////////////////////////////////////////////
	///// Multi-Pass Process
	///// Here we change the process into A.GetWidth()/BlockNum passes
	///// In this way, we can compute bigger Matrix within limited instructions number
	int BlockNum = 2;
	
	//// Combine the whole Fragment Program together
	int nSteps = BlockNum - 1;
	int nInstructions = nSteps*5 + 9;
	if(nInstructions > ResourceLimitOnCurrentHW.FragmentProgram.MaxInstructionsNum)
	{
		AfxMessageBox("Due to the instruction limitation, \
			this program can not do such big matrix-matrix multiplication in clMatMatBlockFP!");
		return;
	}

	int totalsize = strlen(strStart) + 
			        strlen(strEndPart) + 
				    strlen(strStepPart)*nSteps;

	char *strShader = new char[totalsize*2];
	if( strShader == NULL)
	{
		AfxMessageBox("Error to allocate memory for shader string in clMatMat4ChannelFP!");
		return;
	}

	strcpy(strShader, strStart);	
	for(i=0;i<nSteps;i++)
	{
		strcat(strShader, strStepPart);
	}
	strcat(strShader, strEndPart);

	LoadFragmentProgramFromString((const unsigned char*)strShader, ShaderID);

	delete[] strShader;

	unsigned char strTexOffset[] = "OneTexOffset";

	int len = strlen((const char*)strTexOffset);

	float TexOffset[] = { 0.0, 0.0, 0.0, 0.0};

	///fOR THE REASON OF GL_RECTANGLE_NV;
	TexOffset[0] = 1.0;///(float)nPass;
	TexOffset[1] = 1.0;///(float)nPass;

	glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
	glDisable(GL_DEPTH_TEST);


	glEnable(GL_FRAGMENT_PROGRAM_NV);
	
	glActiveTextureARB( GL_TEXTURE0_ARB );
	glBindTexture(_iTextureTarget, A.GetWholeTextureID());

	glActiveTextureARB( GL_TEXTURE1_ARB );
	glBindTexture(_iTextureTarget, B.GetWholeTextureID());

	glActiveTextureARB( GL_TEXTURE2_ARB );
	glBindTexture(_iTextureTarget, result.GetWholeTextureID());
	
	glBindProgramNV(GL_FRAGMENT_PROGRAM_NV, ShaderID);
	for(i=0; i<nPass; i+=BlockNum)
	{
		///fOR THE REASON OF GL_RECTANGLE_NV;
		TexOffset[3] = (float)i;
		glProgramNamedParameter4fvNV(ShaderID,  len, strTexOffset, TexOffset);

		glBegin( GL_QUADS );			
			glMultiTexCoord2dARB(GL_TEXTURE0_ARB, 0, 0);    //// A
			glMultiTexCoord2dARB(GL_TEXTURE1_ARB, 0, 0);    //// B
			glMultiTexCoord2dARB(GL_TEXTURE2_ARB, 0, 0);    //// TEMP
			glVertex2i( 0, 0);

			glMultiTexCoord2dARB(GL_TEXTURE0_ARB, Width, 0);  //// A
			glMultiTexCoord2dARB(GL_TEXTURE1_ARB, Width, 0);  //// B	
			glMultiTexCoord2dARB(GL_TEXTURE2_ARB, Width, 0);  //// TEMP	
			glVertex2i( Width, 0);

			glMultiTexCoord2dARB(GL_TEXTURE0_ARB, Width, Height);  //// A
			glMultiTexCoord2dARB(GL_TEXTURE1_ARB, Width, Height);  //// B	
			glMultiTexCoord2dARB(GL_TEXTURE2_ARB, Width, Height);  //// TEMP	
			glVertex2i( Width, Height);

			glMultiTexCoord2dARB(GL_TEXTURE0_ARB, 0, Height);  //// A
			glMultiTexCoord2dARB(GL_TEXTURE1_ARB, 0, Height);  //// B
			glMultiTexCoord2dARB(GL_TEXTURE2_ARB, 0, Height);  //// TEMP
			glVertex2i( 0, Height);
		glEnd();

		////save the immediate texture result into 
		glBindTexture(_iTextureTarget, result.GetWholeTextureID());
		glCopyTexSubImage2D(_iTextureTarget, 0, 0, 0, 0, 0, Width, Height);

		glActiveTextureARB( GL_TEXTURE2_ARB );
		glBindTexture(_iTextureTarget, result.GetWholeTextureID());
	}
	glDisable(GL_BLEND);

	////save the immediate texture result into 
	glBindTexture(_iTextureTarget, result.GetWholeTextureID());
	glCopyTexSubImage2D(_iTextureTarget, 0, 0, 0, 0, 0, Width, Height);
	glDisable(GL_FRAGMENT_PROGRAM_NV);

	return;
}

////////////////////////////////////////////////////////////////////////////////
/// Matrix result(m*n) = Matrix A(m*l) * Matrix B(l*n)
/// 4-Channel method
/// ----------Note: The Texture should be changed 
/// TEXTURE should be even value, in this way the matrix can be organzied into 
/// 4 blocks
/// Support TEX_RECTANGLE
/// in Fragment Program Mode
////////////////////////////////////////////////////////////////////////////////
void  clMatMat4ChannelFP(const CDenseMatrixOnGPU& A, const CDenseMatrixOnGPU& B, CDenseMatrixOnGPU& result)
{
	int i;
	unsigned int ShaderID;

	//////////////////// Tex0-----DimY*nPass---A--x=nPass,y=Height;
	//////////////////// Tex1-----nPass*DimX---B--x=Width, y=nPass;
	//////////////////// Result---Height*Width-------x=Width, y=Height;
	//// Result Matrix's size
	unsigned int _iTextureTarget = A.GetTextureTarget();
	int Height = A.GetHeight();
	int Width  = B.GetWidth();

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -