fft2d_gpu.cpp

来自「using NVIDIA FFT CUDA Library to solve F」· C++ 代码 · 共 90 行

CPP
90
字号
/*  icc -c fft2d_gpu.cpp -I/usr/local/cuda/include -I/opt/NVIDIA_CUDA_SDK/common/inc -L/usr/local/cuda/lib -lcuda -lcufft -L/opt/NVIDIA_CUDA_SDK/lib -lcutil -lm*/     #include <stdlib.h>     #include <stdio.h>     #include <math.h>     #include <cuda_runtime.h>     #include <cufft.h>     #include <cutil.h>     #include <math_constants.h>extern "C"{extern void fft2d_gpu_( int *nx, int *ny, int *isign, float data[], int *istep){      cufftHandle plan;     cufftComplex *in, *devdata, *out;     int i,j,k,ne_l;     int argc;     char** argv;/*       CUT_DEVICE_INIT(argc, argv);// display CUDA device info   int deviceCount;CUDA_SAFE_CALL(cudaGetDeviceCount(&deviceCount));for (int dev = 0; dev < deviceCount; ++dev) { cudaDeviceProp deviceProp; CUDA_SAFE_CALL(cudaGetDeviceProperties(&deviceProp, dev)); printf("\nDevice %d: \"%s\"\n", dev, deviceProp.name); printf("  Major revision number:                         %d\n", deviceProp.major); printf("  Minor revision number:                         %d\n", deviceProp.minor); printf("  Total amount of global memory:                 %d bytes\n", deviceProp.totalGlobalMem); printf("  Clock rate:                                    %d kilohertz\n", deviceProp.clockRate); }*/         size_t arraySize = sizeof(cufftComplex) * (*nx) * (*ny);	 int dataSize = sizeof(float) * 2. * (*nx) * (*ny);	 CUDA_SAFE_CALL(cudaMallocHost((void**) &data, dataSize));         CUDA_SAFE_CALL(cudaMallocHost((void**) &in, arraySize));	 CUDA_SAFE_CALL(cudaMallocHost((void**) &out, arraySize));//malloc arrays on device and populate input         CUDA_SAFE_CALL(cudaMalloc((void**)&devdata, arraySize));	ne_l = (*nx) * (*ny);       for(i=0; i < ne_l; i++)        {  		in[i].x = data[2*i];  		in[i].y = data[2*i+1];	}         CUDA_SAFE_CALL(cudaMemcpy(devdata, in, arraySize, cudaMemcpyHostToDevice));/*   start plan creation, then compute the forward transform */	if (*isign == -1)// start plan creation	{         CUDA_SAFE_CALL(cufftPlan2d(&plan, *nx, *ny, CUFFT_C2C));         CUDA_SAFE_CALL(cufftExecC2C(plan, devdata, devdata, CUFFT_FORWARD));         CUDA_SAFE_CALL(cudaMemcpy(out, devdata, arraySize, cudaMemcpyDeviceToHost));	};	if (*isign == 1)	{         CUDA_SAFE_CALL(cufftPlan2d(&plan, *nx, *ny, CUFFT_C2C));         CUDA_SAFE_CALL(cufftExecC2C(plan, devdata, devdata, CUFFT_INVERSE));         CUDA_SAFE_CALL(cudaMemcpy(out, devdata, arraySize, cudaMemcpyDeviceToHost));	};/*   Scaling and send data back */     for(i=0; i < ne_l; i++)       {		 data[2*i]   = out[i].x;		 data[2*i+1] = out[i].y;       }         CUDA_SAFE_CALL(cufftDestroy(plan));  	 CUDA_SAFE_CALL(cudaFreeHost(data));         CUDA_SAFE_CALL(cudaFreeHost(in));         CUDA_SAFE_CALL(cudaFreeHost(out));         CUDA_SAFE_CALL(cudaFree(devdata));     }}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?