⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 matrixmultexture_kernel.cu

📁 基于GPU的并行编程
💻 CU
字号:
#define BLOCK_SIZE 16


texture<float,2,cudaReadModeElementType> texMatrixA;
texture<float,2,cudaReadModeElementType> texMatrixB;

__global__ void MatMulTexture_kernel(float *C,int hA,int wA,int wB)
{
	/*const int bx=blockIdx.x;
	const int by=blockIdx.y;
	const int tx=threadIdx.x;
	const int ty=threadIdx.y;

	int aStart=by*hA*BLOCK_SIZE;
	int aEnd=aStart+wA-1;
	int aStep=BLOCK_SIZE;
	int bStart=BLOCK_SIZE*bx;
	int bStep=BLOCK_SIZE*wB;

	float temp=0;
	for(int a=aStart,b=bStart;a<aEnd;a+=aStep,b+=bStep)
	{
		for(int k=0;k<BLOCK_SIZE;k++)
			temp+=tex2D(texMatrixA,by*BLOCK_SIZE+ty,k)*tex2D(texMatrixB,k,bx*BLOCK_SIZE+tx);
	}*/

	int idx=blockIdx.x*blockDim.x+threadIdx.x;
	int idy=blockIdx.y*blockDim.y+threadIdx.y;

	float temp=0;
	for(int k=0;k<wA;k++)
		temp+=tex2D(texMatrixA,idy,k)*tex2D(texMatrixB,k,idx);

	C[idx*wB+idy]=temp;
}

void MatMulTexture(float *A,float *B,float *C,int hA,int wA,int wB)
{
	float *d_C;
	cudaMalloc((void**)&d_C,hA*wB*sizeof(float));
	
	cudaChannelFormatDesc ChFDesc1=cudaCreateChannelDesc<float>();
	cudaArray *cuArray_A,*cuArray_B;
	
	cudaMallocArray(&cuArray_A,&ChFDesc1,hA,wA);
	cudaChannelFormatDesc ChFDesc2=cudaCreateChannelDesc<float>();
	cudaMallocArray(&cuArray_B,&ChFDesc2,wA,wB);
	
	cudaMemcpyToArray(cuArray_A,0,0,A,sizeof(float)*hA*wA,cudaMemcpyHostToDevice);
	cudaMemcpyToArray(cuArray_B,0,0,B,sizeof(float)*wA*wB,cudaMemcpyHostToDevice);
	
	cudaBindTextureToArray(texMatrixA,cuArray_A);
	cudaBindTextureToArray(texMatrixB,cuArray_B);

	dim3 block(BLOCK_SIZE,BLOCK_SIZE);
	dim3 grid(wB/BLOCK_SIZE,hA/BLOCK_SIZE);

	MatMulTexture_kernel<<<grid,block>>>(d_C,hA,wA,wB);

	cudaMemcpy(C,d_C,sizeof(float)*hA*wB,cudaMemcpyDeviceToHost);

	cudaUnbindTexture(texMatrixA);
	cudaUnbindTexture(texMatrixB);

	cudaFree(d_C);
	cudaFreeArray(cuArray_A);
	cudaFreeArray(cuArray_B);
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -