⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 densematrixvectormultiply.cpp

📁 PDE simulator on GPU.
💻 CPP
📖 第 1 页 / 共 2 页
字号:
		pbuffer[i]->EndCapture();
	}
*/
	return;
}

//////////////////////////////////////////////////////////////////////////////
//// New Version on GL_NV_fragment_program
//// Modified Date: 2004/1/7
//// Vector(result) = Matrix(A) * Vector(x)
//// Matrix A -----> General Matrix which is square
//// Just Support TEX_RECTANGLE
//// Support 1 Channel, vector x in 1 channel
//// BUT Ai, Ai+1, Ai+2, Ai+3 ---> in RGBA mode, stored in ONE texel
//// Here, also use max multi-texture unit number 
//////////////////////////////////////////////////////////////////////////////
void  MultiDenseMatVecMultiTexFP(const CDenseMatrixOnGPU& A, const CVectorOnGPU& x, CVectorOnGPU& result)
{
	////////////////////////////////////////////
	///If the size is compatible, then error occurs
	int i, j, nLen = A.GetDim();
	if(nLen != x.GetVectorLength())
	{
		AfxMessageBox("Error to Operate on Matrix-Vector Multiply for different size!");
		return;
	}
	//////////////////////////////////////////////////////////////////////////
	unsigned int _iTextureTarget = x.GetTextureTarget();
	if(_iTextureTarget == GL_TEXTURE_2D)
	{
		AfxMessageBox("Not support GL_TEXTURE_2D in current system(MultiDenseMatVecFP) for the reason of float precision!");
		return;
	}
	int Width  = x.GetWidth();
	int Height = x.GetHeight(); 

	int maxtexnum = ResourceLimitOnCurrentHW.MaxTexUnitsNum - 2;

	////////////////////////////////////////////
	///Ax---->Matrix-Vector Product
	unsigned int ShaderID;
	bool isready = false;
	//if(isready == false)
	{
		char ShaderStrFirstPart[]="!!FP1.0\n\
DECLARE TexPARA ={0, 0, 0, 0};\n\
DECLARE PassNo ={0, 0, 0, 0};\n\
FLR R1, f[WPOS];\n\
MAD R0.x, R1.y, TexPARA.x, R1.x;\n\
SUBC R1.x, R0.x, TexPARA.z;\n\
KIL GE.x;\n\
TEX R2, f[WPOS], TEX1, RECT;\n\
TEX R3, f[WPOS], TEX2, RECT;\n\
ADD R0.x, R0.x, PassNo.x;\n\
MUL R1.x, R0.x, TexPARA.w;\n\
FLR R1.x, R1.x;\n\
MUL R1.x, R1.x, TexPARA.z;\n\
SUB R6.x, R0.x, R1.x;\n\
MUL R1.y, R6.x, TexPARA.y;\n\
FLR R1.y, R1.y;\n\
MUL R1.x, R1.y, TexPARA.x;\n\
SUB R1.x, R6.x, R1.x;\n\
TEX R4, R1, TEX0, RECT;\n\
MAD R5, R4, R3.x, R2;\n\
ADD R0.x, R0.x, {1};\n\
MUL R1.x, R0.x, TexPARA.w;\n\
FLR R1.x, R1.x;\n\
MUL R1.x, R1.x, TexPARA.z;\n\
SUB R6.x, R0.x, R1.x;\n\
MUL R1.y, R6.x, TexPARA.y;\n\
FLR R1.y, R1.y;\n\
MUL R1.x, R1.y, TexPARA.x;\n\
SUB R1.x, R6.x, R1.x;\n\
TEX R4, R1, TEX0, RECT;\n\
MAD R5, R4, R3.y, R5;\n\
ADD R0.x, R0.x, {1};\n\
MUL R1.x, R0.x, TexPARA.w;\n\
FLR R1.x, R1.x;\n\
MUL R1.x, R1.x, TexPARA.z;\n\
SUB R6.x, R0.x, R1.x;\n\
MUL R1.y, R6.x, TexPARA.y;\n\
FLR R1.y, R1.y;\n\
MUL R1.x, R1.y, TexPARA.x;\n\
SUB R1.x, R6.x, R1.x;\n\
TEX R4, R1, TEX0, RECT;\n\
MAD R5, R4, R3.z, R5;\n\
ADD R0.x, R0.x, {1};\n\
MUL R1.x, R0.x, TexPARA.w;\n\
FLR R1.x, R1.x;\n\
MUL R1.x, R1.x, TexPARA.z;\n\
SUB R6.x, R0.x, R1.x;\n\
MUL R1.y, R6.x, TexPARA.y;\n\
FLR R1.y, R1.y;\n\
MUL R1.x, R1.y, TexPARA.x;\n\
SUB R1.x, R6.x, R1.x;\n\
TEX R4, R1, TEX0, RECT;\n\
MAD R5, R4, R3.w, R5;";

		char ShaderStrFollowingPart[]="\nADD R0.x, R0.x, {1};\n\
MUL R1.x, R0.x, TexPARA.w;\n\
FLR R1.x, R1.x;\n\
MUL R1.x, R1.x, TexPARA.z;\n\
SUB R6.x, R0.x, R1.x;\n\
MUL R1.y, R6.x, TexPARA.y;\n\
FLR R1.y, R1.y;\n\
MUL R1.x, R1.y, TexPARA.x;\n\
SUB R1.x, R6.x, R1.x;\n\
TEX R4, R1, TEX0, RECT;\n\
MAD R5, R4, R3.x, R5;\n\
ADD R0.x, R0.x, {1};\n\
MUL R1.x, R0.x, TexPARA.w;\n\
FLR R1.x, R1.x;\n\
MUL R1.x, R1.x, TexPARA.z;\n\
SUB R6.x, R0.x, R1.x;\n\
MUL R1.y, R6.x, TexPARA.y;\n\
FLR R1.y, R1.y;\n\
MUL R1.x, R1.y, TexPARA.x;\n\
SUB R1.x, R6.x, R1.x;\n\
TEX R4, R1, TEX0, RECT;\n\
MAD R5, R4, R3.y, R5;\n\
ADD R0.x, R0.x, {1};\n\
MUL R1.x, R0.x, TexPARA.w;\n\
FLR R1.x, R1.x;\n\
MUL R1.x, R1.x, TexPARA.z;\n\
SUB R6.x, R0.x, R1.x;\n\
MUL R1.y, R6.x, TexPARA.y;\n\
FLR R1.y, R1.y;\n\
MUL R1.x, R1.y, TexPARA.x;\n\
SUB R1.x, R6.x, R1.x;\n\
TEX R4, R1, TEX0, RECT;\n\
MAD R5, R4, R3.z, R5;\n\
ADD R0.x, R0.x, {1};\n\
MUL R1.x, R0.x, TexPARA.w;\n\
FLR R1.x, R1.x;\n\
MUL R1.x, R1.x, TexPARA.z;\n\
SUB R6.x, R0.x, R1.x;\n\
MUL R1.y, R6.x, TexPARA.y;\n\
FLR R1.y, R1.y;\n\
MUL R1.x, R1.y, TexPARA.x;\n\
SUB R1.x, R6.x, R1.x;\n\
TEX R4, R1, TEX0, RECT;\n\
MAD R5, R4, R3.w, R5;";

		char ShaderStrEndPart[] ="\nMOV o[COLR], R5;\nEND\0";
		char OneLine[100];
		int  maxintr = 50 + 44 + 1 + maxtexnum-1;
		if(maxintr > ResourceLimitOnCurrentHW.FragmentProgram.MaxInstructionsNum)
		{
			AfxMessageBox("The instructions number is too big!");
			return;
		}
		int totalsize = strlen(ShaderStrFirstPart) + 
						strlen(ShaderStrEndPart) + 
					   (strlen(ShaderStrFollowingPart) +
						strlen(OneLine))*(maxtexnum-1);
		char* ShaderStr = new char[totalsize*2];
		if(ShaderStr == NULL)
		{
			AfxMessageBox("Error to allocate memory for shader string iN MultiDenseMatVecMultiTexFP!");
			return;
		}

		/////compose all the parts into one string shader
		strcpy(ShaderStr, ShaderStrFirstPart);
		for(i=0; i<maxtexnum-1; i++)
		{
			sprintf(OneLine, "\nTEX R3, f[WPOS], TEX%d, RECT;\0", i+3);
			strcat(ShaderStr, OneLine);
			strcat(ShaderStr, ShaderStrFollowingPart);
		}
		strcat(ShaderStr, ShaderStrEndPart);

		//////////////////////////////////////////////////////////////////////////
		LoadFragmentProgramFromString((const unsigned char*)ShaderStr, ShaderID);

		delete[] ShaderStr;
		isready = true;
	}


	//////////////////////////////////////////////
	///// Clear the framebuffer firstly to avoid the previous computation
	///// This step is important to avoid computation errors
	//glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);	
	//glDisable(GL_DEPTH_TEST);

	unsigned char strTexPara[] = "TexPARA";
	int len1 = strlen((const char*)strTexPara);
	float TexelPara[4];
	TexelPara[0] = Width;
	TexelPara[1] = 1.0/Width;

	TexelPara[2] = nLen;
	TexelPara[3] = 1.0/nLen;
	
	unsigned char strPassNo[] = "PassNo";
	int len2 = strlen((const char*)strPassNo);
	float PassNo[4];
	
	////##################################################################
	for(i=0;i<2;i++)
	{
		pbuffer[i]->BeginCapture();
		glClear(GL_COLOR_BUFFER_BIT);	

		glEnable(GL_FRAGMENT_PROGRAM_NV);
		glBindProgramNV(GL_FRAGMENT_PROGRAM_NV, ShaderID);
		glProgramNamedParameter4fvNV(ShaderID,  len1, strTexPara, TexelPara);

		//////////////////////////////////////////////
		///x
		glActiveTextureARB(GL_TEXTURE0_ARB);
		glBindTexture(_iTextureTarget, x.GetTextureID());	

		glActiveTextureARB(GL_TEXTURE1_ARB);
		glBindTexture(_iTextureTarget, result.GetTextureID());
		result.SetZero();
		pbuffer[i]->EndCapture();
	}

	//////////////////////////////////////////////////////////////////////////
	//// Adjust the texture numbers according to current HW
	//// Here 4 means RGBA 4 channels---Ai,Ai+1,Ai+2,Ai+3
	//// Why ceil====>because fragment program just ignore those to zero
	int nPass = (int)ceil((float)nLen/(4.0*maxtexnum));
	int WhichPBuffer;
	///Following: Tex_X*Tex_A0 + Tex_X*Tex_A1 + ... + Tex_X*Tex_An-1 
	for(i=0; i<nPass; i++)
	{	
		WhichPBuffer = i%2;
		////##################################################################
		pbuffer[WhichPBuffer]->BeginCapture();
		//////////////////////////////////////////////
		///A[i]---A[i+14] if HW support 16 multi-texture
		for(j=0; j<maxtexnum; j++)
		{
			glActiveTextureARB(GL_TEXTURE2_ARB + j );
			glBindTexture(_iTextureTarget, A.GetTextureID(i*maxtexnum+j));
		}
		
		/// Transfer the offset variables into the fragment program by names
		/// To Lookup the correct texture position in Texture x
		PassNo[0] = i*4*maxtexnum;
		glProgramNamedParameter4fvNV(ShaderID,  len2, strPassNo, PassNo);
		
		glBegin( GL_QUADS );
			glVertex2i( 0, 0 );
			glVertex2i( Width, 0 );
			glVertex2i( Width, Height );
			glVertex2i( 0, Height );
		glEnd();

		// Now we need to copy the resulting pixels into the output
		// save the result texture into result
		//glActiveTextureARB(GL_TEXTURE1_ARB);
		glBindTexture(_iTextureTarget, result.GetTextureID());
		//glCopyTexSubImage2D(_iTextureTarget, 0, 0, 0, 0, 0, Width, Height);
		pbuffer[WhichPBuffer]->EndCapture();
	}
/*
	for(i=0;i<2;i++)
	{
		pbuffer[i]->BeginCapture();
		glDisable(GL_FRAGMENT_PROGRAM_NV);
		pbuffer[i]->EndCapture();
	}
*/
    return;
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -