📄 linearequationsongpu.cpp
字号:
// LinearEquationsOnGPU.cpp: implementation of the LinearEquationsOnGPU class.
//
//////////////////////////////////////////////////////////////////////
#include "stdafx.h"
#include "shadow.h"
#include "LinearEquationsOnGPU.h"
#include "CommonHW.h"
#include "DenseMatrixOnGPU.h"
#include "VectorOnGPU.h"
#include "DenseMatrixMatrixOperation.h"
#include "DenseMatrixVectorMultiply.h"
#include "VectorVectorOperation.h"
#include "SparseMatrixOnGPU1.h"
#include "SparseMatrixVectorMultiply1.h"
#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif
/// *****************THIS PROGRAM JUST FOR WU WEN**************************
//////////////////////////////////////////////////////////////////////
/// Total Object---------------------Graphics Hardware
/// Linear Equations Solver on GPU
/// Ax = b ----> solve x
/// Matrix----M[Dim][Dim]
/// Output Vector---x[Dim]
//////////////////////////////////////////////////////////////////////
void ConjugateGradientSolverOnGPU(float *M, float *x, float *b, int Dim)
{
///// Tolerance of Iterations Error
float ToL = 1.0e-3;
///// Maximum number of Iterations
int i, MaxIterNum = 100;
float ro, ai, bi;
//////// Data prepare
//////// Input the data from outside
CDenseMatrixOnGPU inputM;
inputM.SetData(M, Dim);
CVectorOnGPU inputB;
CVectorOnGPU inputX;
CVectorOnGPU inputR;
inputB.SetData(b, Dim);
inputX.SetData(NULL, Dim);
inputR.SetData(NULL, Dim);
/// Just a temp vector here
CVectorOnGPU inputT;
CVectorOnGPU inputP;
CVectorOnGPU inputQ;
inputT.SetData(NULL, Dim);
inputP.SetData(NULL, Dim);
inputQ.SetData(NULL, Dim);
//////// Iterative procedure
// InputT = inputM*inputX
DenseMatVecFP(inputM, inputX, inputT);
// InputR = inputB - inputT
clVecOpFP(CL_VECT_SUB_VECT_RECT, 1.0, 1.0, inputB, inputT, inputR);
// InputP = inputR
clVecOpFP(CL_VECT_ADD_VECT_RECT, 1.0, 0.0, inputR, inputR, inputP);
///// Here just allow 1000 times interations
for(i=0; i<MaxIterNum; i++)
{
// inputR.inputR
ro = clInnerProductFP(inputR, inputR);
// InputQ = inputM*inputP
DenseMatVecFP(inputM, inputP, inputQ);
// inputP.inputQ
ai = clInnerProductFP(inputP, inputQ);
ai = ro/ai;
// InputX = inputX + ai*inputP
clVecOpFP(CL_VECT_ADD_VECT_RECT, 1.0, ai, inputX, inputP, inputX);
// InputR = inputR - ai*inputQ
clVecOpFP(CL_VECT_SUB_VECT_RECT, 1.0, ai, inputR, inputQ, inputR);
// inputR.inputR
bi = clInnerProductFP(inputR, inputR);
if(bi < ToL)
break;
bi = bi/ro;
// InputP = inputR + bi*inputP
clVecOpFP(CL_VECT_ADD_VECT_RECT, 1.0, bi, inputR, inputP, inputP);
///////Convergence check HERE
//fprintf(fp,"\nIter %d: ro=%f,ai=%f,bi=%f\n", i, ro, ai, bi);
}
//////// Transfer the result from GPU to CPU main memory
float *result = inputX.GetData(Dim);
memcpy(x, result, sizeof(float)*Dim);
delete[] result;
result = NULL;
return;
}
//////////////////////////////////////////////////////////////////////
/// Jacobi Solver on GPU
//////////////////////////////////////////////////////////////////////
void JacobiSolverOnGPU(float *M, float *x, float *b, int Dim)
{
///// Tolerance of Iterations Error
float ToL = 1.0e-3;
///// Maximum number of Iterations
int MaxIterNum = 1000;
int i,k;
///// Diagonal Elements of M
float *D = new float[Dim];
///// Left & Right Elements of M
float *LR = new float[Dim*Dim];
memset(D, 0, sizeof(float)*Dim);
memcpy(LR, M, sizeof(float)*Dim*Dim);
for(i=0; i<Dim; i++)
{
k = i*Dim + i;
D[i] = 1.0f/M[k];
LR[k] = 0.0;
}
//////// Data prepare
//////// Input the data from outside
CDenseMatrixOnGPU inputM;
inputM.SetData(LR, Dim);
CVectorOnGPU inputB;
CVectorOnGPU inputX;
CVectorOnGPU inputD;
CVectorOnGPU inputT;
inputB.SetData(b, Dim);
inputD.SetData(D, Dim);
inputX.SetData(NULL, Dim);
inputT.SetData(NULL, Dim);
float Tolerance;
for(k=0; k<MaxIterNum; k++)
{
// Record the previous X -----> InputT = old inputX
clVecOpFP(CL_VECT_ADD_VECT_RECT, 1.0, 0.0, inputX, inputX, inputT);
// InputX = inputM*inputX = LR*x
DenseMatVecFP(inputM, inputX, inputX);
// InputX = inputB - inputX
clVecOpFP(CL_VECT_SUB_VECT_RECT, 1.0, 1.0, inputB, inputX, inputX);
// InputX = inputD * inputX
clVecOpFP(CL_VECT_MULT_VECT_RECT, 1.0, 1.0, inputD, inputX, inputX);
// compute the tolerance by InputT = inputX - inputT
clVecOpFP(CL_VECT_SUB_VECT_RECT, 1.0, 1.0, inputX, inputT, inputT);
// inputT.inputT
Tolerance = clInnerProductFP(inputT, inputT);
TRACE("Tolerance = %f\n",Tolerance);
if(Tolerance < ToL)
{
break;
}
}
TRACE("Max Iterations Num = %d\n", k);
delete[] D;
D = NULL;
delete[] LR;
LR = NULL;
//////// Transfer the result from GPU to CPU main memory
float *result = inputX.GetData(Dim);
memcpy(x, result, sizeof(float)*Dim);
delete[] result;
result = NULL;
return;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -