⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 fft2d_gpu.cpp

📁 using NVIDIA FFT CUDA Library to solve FFT problem
💻 CPP
字号:
/*  icc -c fft2d_gpu.cpp -I/usr/local/cuda/include -I/opt/NVIDIA_CUDA_SDK/common/inc -L/usr/local/cuda/lib -lcuda -lcufft -L/opt/NVIDIA_CUDA_SDK/lib -lcutil -lm*/     #include <stdlib.h>     #include <stdio.h>     #include <math.h>     #include <cuda_runtime.h>     #include <cufft.h>     #include <cutil.h>     #include <math_constants.h>extern "C"{extern void fft2d_gpu_( int *nx, int *ny, int *isign, float data[], int *istep){      cufftHandle plan;     cufftComplex *in, *devdata, *out;     int i,j,k,ne_l;     int argc;     char** argv;/*       CUT_DEVICE_INIT(argc, argv);// display CUDA device info   int deviceCount;CUDA_SAFE_CALL(cudaGetDeviceCount(&deviceCount));for (int dev = 0; dev < deviceCount; ++dev) { cudaDeviceProp deviceProp; CUDA_SAFE_CALL(cudaGetDeviceProperties(&deviceProp, dev)); printf("\nDevice %d: \"%s\"\n", dev, deviceProp.name); printf("  Major revision number:                         %d\n", deviceProp.major); printf("  Minor revision number:                         %d\n", deviceProp.minor); printf("  Total amount of global memory:                 %d bytes\n", deviceProp.totalGlobalMem); printf("  Clock rate:                                    %d kilohertz\n", deviceProp.clockRate); }*/         size_t arraySize = sizeof(cufftComplex) * (*nx) * (*ny);	 int dataSize = sizeof(float) * 2. * (*nx) * (*ny);	 CUDA_SAFE_CALL(cudaMallocHost((void**) &data, dataSize));         CUDA_SAFE_CALL(cudaMallocHost((void**) &in, arraySize));	 CUDA_SAFE_CALL(cudaMallocHost((void**) &out, arraySize));//malloc arrays on device and populate input         CUDA_SAFE_CALL(cudaMalloc((void**)&devdata, arraySize));	ne_l = (*nx) * (*ny);       for(i=0; i < ne_l; i++)        {  		in[i].x = data[2*i];  		in[i].y = data[2*i+1];	}         CUDA_SAFE_CALL(cudaMemcpy(devdata, in, arraySize, cudaMemcpyHostToDevice));/*   start plan creation, then compute the forward transform */	if (*isign == -1)// start plan creation	{         CUDA_SAFE_CALL(cufftPlan2d(&plan, *nx, *ny, CUFFT_C2C));         CUDA_SAFE_CALL(cufftExecC2C(plan, devdata, devdata, CUFFT_FORWARD));         CUDA_SAFE_CALL(cudaMemcpy(out, devdata, arraySize, cudaMemcpyDeviceToHost));	};	if (*isign == 1)	{         CUDA_SAFE_CALL(cufftPlan2d(&plan, *nx, *ny, CUFFT_C2C));         CUDA_SAFE_CALL(cufftExecC2C(plan, devdata, devdata, CUFFT_INVERSE));         CUDA_SAFE_CALL(cudaMemcpy(out, devdata, arraySize, cudaMemcpyDeviceToHost));	};/*   Scaling and send data back */     for(i=0; i < ne_l; i++)       {		 data[2*i]   = out[i].x;		 data[2*i+1] = out[i].y;       }         CUDA_SAFE_CALL(cufftDestroy(plan));  	 CUDA_SAFE_CALL(cudaFreeHost(data));         CUDA_SAFE_CALL(cudaFreeHost(in));         CUDA_SAFE_CALL(cudaFreeHost(out));         CUDA_SAFE_CALL(cudaFree(devdata));     }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -