📄 conjugate_gradient.br
字号:
/* Conjugate Gradient (CG) Solver for Ax = b. The conjugate method is used to solve large systems of equations where A is sparse and symmetric positive definite. For a description of the technique, see "Matrix Computations", by Golub and Van Loan. Note: see the example app spMatrixVec for a description of how the sparse matrix A is represented for streaming, and how the matrix-vector product is performed. The program takes two parameters 'it_max' and 'eps'. it_max: maximum number of iterations of solver eps: solution convergence threshold*/#include <stdio.h>#include <stdlib.h>#define MAX_NZ_PER_ROW 7#define NUM_ROWS 200#define MAX_NZ (NUM_ROWS*MAX_NZ_PER_ROW)/*** Kernels needed for Sparse Matrix-Vector multiplication ****************/kernel void mult(float a<>, float b<>, out float c<>) { c = a*b;}kernel void sparse_matmult_product( float index<>, float x[][], float A<>, out float result<> ){ result = x[index][0] * A;}/*** Kernels called in the CG iterations *************************************/kernel void square(float a<>, out float b<>) { b = a*a;}kernel void scaleAdd(float a<>, float b<>, float scale, out float c<>) { c = a + (scale*b);}kernel void subtract(float a<>, float b<>, out float c<>) { c = a - b;}kernel void copy(float a<>, out float b<>) { b = a;}reduce void sum(float a<>, reduce float result<>) { result += a;}/**********************************************************************/// generates a matrix resulting from the discretation of the poisson operator// on a uniform grid of dimension 'dim' and grdi resolution 'size'. This matrix// is very sparse and symmetric positive definite and thus can be solved efficiently// using the CG methodvoid createCSRFromPoisson(int size, int dim, float nz[MAX_NZ], int cols[MAX_NZ], int rowStart[NUM_ROWS+1]) { int nnzPerRow = 0, matSize = 0; int i=0,j=0; int offsets[7][2]; int nnz = 0; int idx = 0; switch (dim) { case 1: nnzPerRow = 3; matSize = size; offsets[0][0] = -1; offsets[0][1] = -1; offsets[1][0] = 0; offsets[1][1] = 2; offsets[2][0] = 1; offsets[2][1] = -1; break; case 2: nnzPerRow = 5; matSize = size*size; offsets[0][0] = -size; offsets[0][1] = -1; offsets[1][0] = -1; offsets[1][1] = -1; offsets[2][0] = 0; offsets[2][1] = 4; offsets[3][0] = 1; offsets[3][1] = -1; offsets[4][0] = size; offsets[4][1] = -1; break; case 3: nnzPerRow = 7; matSize = size*size*size; offsets[0][0] = -size*size; offsets[0][1] = -1; offsets[1][0] = -size; offsets[1][1] = -1; offsets[2][0] = -1; offsets[2][1] = -1; offsets[3][0] = 0; offsets[3][1] = 6; offsets[4][0] = 1; offsets[4][1] = -1; offsets[5][0] = size; offsets[5][1] = -1; offsets[6][0] = size*size; offsets[6][1] = -1; break; default: printf("Error in createCSRFromPoisson()\n"); return; } for (i=0;i<matSize;i++) { for (j=0;j<nnzPerRow;j++) { int col = i+offsets[j][0]; if (col >= 0 && col < matSize) nnz++; } } rowStart[0] = 0; for (i=0;i<matSize;i++) { for (j=0;j<nnzPerRow;j++) { int col = i+offsets[j][0]; if (col>= 0 && col < matSize) { nz[idx] = (float)offsets[j][1]; cols[idx] = col; idx++; } } rowStart[i+1] = idx; }}/// The data is generated in compressed sparse row (CSR) format.// This converts the CSR format into the padded ITPACK// representation that is easy to stream.void reshuffleData(float nz[MAX_NZ], int cols[MAX_NZ], int rowStart[NUM_ROWS+1], float Anz[MAX_NZ], float Acols[MAX_NZ]) { int i,j; for (i=0;i<NUM_ROWS;i++) { int offset = 0; for (j=rowStart[i];j<rowStart[i+1];j++) { Anz[MAX_NZ_PER_ROW*i + offset] = nz[j]; Acols[MAX_NZ_PER_ROW*i + offset] = (float)cols[j]; offset++; } // must pad the rest of the row while (offset < MAX_NZ_PER_ROW) { Anz[MAX_NZ_PER_ROW*i + offset] = 0.0f; Acols[MAX_NZ_PER_ROW*i + offset] = (float)0.0f; // this should be an invalid index.... but doesn't have to be since x multiplied by a zero here offset++; } }}int parseCmdLine(int argc, char** argv, char* filename, int* it_max, float* eps) { if (argc < 3) { printf("it_max and eps arguments.\n"); return 1; } *it_max = atoi(argv[1]); *eps = (float)atof(argv[2]); return 0;}int main(int argc, char** argv) { int i; char filename[80]; int it, it_max; float eps; float alpha, resNew, resOld, convergeThresh; float Anz[MAX_NZ]; float cIdx[MAX_NZ]; float x[NUM_ROWS]; float b[NUM_ROWS]; float AStrm<NUM_ROWS,MAX_NZ_PER_ROW>; float cIdxStrm<NUM_ROWS,MAX_NZ_PER_ROW>; float productsStrm<NUM_ROWS,MAX_NZ_PER_ROW>; float vecGatherStrm<NUM_ROWS,MAX_NZ_PER_ROW>; float xStrm<NUM_ROWS,1>; // solution vector float bStrm<NUM_ROWS,1>; // rhs vector // required temporaries of the CG method float rStrm<NUM_ROWS,1>; float qStrm<NUM_ROWS,1>; float dStrm<NUM_ROWS,1>; float tmpStrm<NUM_ROWS,1>; // compressed sparse column representation of A, will be converted to ITPACK format // for streaming float csrNz[MAX_NZ]; int csrCols[MAX_NZ]; int csrRowStart[NUM_ROWS+1]; if (parseCmdLine(argc, argv, filename, &it_max, &eps)) return 1; // generate a matrix createCSRFromPoisson(NUM_ROWS, 1, csrNz, csrCols, csrRowStart); // convert from pure CSR to a padded scheme better suited for streaming reshuffleData(csrNz, csrCols, csrRowStart, Anz, cIdx); // read in the matrix data streamRead(AStrm, Anz); streamRead(cIdxStrm, cIdx); // Now do the CG solve... printf("\n\n"); // initial guess for x is zero. Hard coding some right hand side for b for (i=0;i<NUM_ROWS;i++) { x[i] = 0.0f; b[i] = 50.0f; } streamRead(xStrm, x); streamRead(bStrm, b); // Ax_0 // these two lines constitute a sparse matrix-vector multiply sparse_matmult_product( cIdxStrm, xStrm, AStrm, productsStrm ); sum( productsStrm, rStrm ); // r = b - Ax subtract(bStrm, rStrm, rStrm); // d = r copy(rStrm, dStrm); // resNormNew = r dot r resNew = 0.0f; square(rStrm, tmpStrm); sum(tmpStrm, resNew); convergeThresh = eps * eps * resNew; printf("initial r norm = %f\n", resNew); printf("max it = %d\n", it_max); printf("eps = %f\n", eps); it = 0; // CG loop. Note, for simplicty the convergence is checked after after // each loop iteration. However, in a practical implementation, multiple // iterations should be run on the hardware before reading back the residual // to check performance. while (it < it_max && resNew > convergeThresh ) { if (it > 0) scaleAdd(rStrm, dStrm, resNew / resOld, dStrm); // q = Ad // these two lines constitute a sparse matrix-vector multiply sparse_matmult_product( cIdxStrm, dStrm, AStrm, productsStrm ); sum( productsStrm, qStrm ); // alpha = d dot q mult(dStrm, qStrm, tmpStrm); sum(tmpStrm, alpha); alpha = resNew / alpha; // x = x + alpha * d scaleAdd(xStrm, dStrm, alpha, xStrm); // r = r - alpha * q scaleAdd(rStrm, qStrm, -1.0f * alpha, rStrm); // resNew = r dot r resOld = resNew; square(rStrm, tmpStrm); sum(tmpStrm, resNew); it++; printf("iteration %d. Current residual norm: %f\n", it, resNew); } streamWrite(xStrm, x); printf("\n"); printf("Solution x:\n"); for (i=0;i<NUM_ROWS;i++) printf("%.3f " ,x[i]); printf("\n"); return 0;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -