📄 gpufftw.cpp
字号:
for(i=0;i<nsize;i++){
reals[i] = real[2*i];
imags[i] = real[2*i+1];
}
result=cgpufft1d(reals,imags, size/2, sign,1);
for(i=0;i<nsize;i++){
real[2*i] = reals[i];
real[2*i+1] = imags[i];
}
delete reals;
delete imags;
} //case isign=-1.
return result;
}
FFT_ERROR GPUFFTW::manycgpufft1d(float *real, float *imag, int arraywidth, int arrayheight, int sign, int isreal){
if(arraywidth < MIN_SIZE){
return (FFT_ARRAY_TOO_SMALL);
}
//amount of data to be ffted on the GPU
int gpuWidth=0;
//Find largest power of 2 smaller than size
int logWidth=0;
for(;arraywidth>>logWidth;logWidth++);
logWidth--;
gpuWidth=1<<logWidth;
int cpuSize=arraywidth-gpuWidth;
if(cpuSize==0){
FFT_ERROR err = setFFTParams(arraywidth*arrayheight);
logWidth-=2;
LOGN=logWidth;
glActiveTextureARB( GL_TEXTURE3_ARB );
glBindTexture(GL_TEXTURE_RECTANGLE_NV, textureid[3]);
glTexSubImage2D(GL_TEXTURE_RECTANGLE_NV, 0, 0, 0, Width, Height, GL_RGBA, GL_FLOAT, imag);
uploadData(real);
glFinish();
cgpufft1d(sign);
glFinish();
readbackData(imag);
glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, fb);
if(target==1)
glReadBuffer( GL_COLOR_ATTACHMENT1_EXT);
else
glReadBuffer( GL_COLOR_ATTACHMENT0_EXT);
glReadPixels(0, 0, Width, Height, GL_RGBA, GL_FLOAT, real);
glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, 0);
if(sign==-1){
if(isreal==1){
int tmp = arraywidth*2;
for(int d=0;d<arrayheight*arraywidth;d++){
real[d]/=tmp;
imag[d]/=tmp;
}
}
else {
for(int d=0;d<arrayheight*arraywidth;d++){
real[d]/=arraywidth;
imag[d]/=arraywidth;
}
}
}
}
else return(FFT_NON_POWER_OF_TWO);
return(FFT_SUCCESS);
}
FFT_ERROR GPUFFTW::manyscgpufft1d(float *real, int arraywidth, int arrayheight, int sign){
int nsize=arraywidth/2,ind;
float *reals = new float[nsize*arrayheight];
float *imags = new float[nsize*arrayheight];
FFT_ERROR result;
unsigned long i,i1,i2,i3,i4,j,np3;
float c1=0.5,c2,h1r,h1i,h2r,h2i;
double wr,wi,wpr,wpi,wtemp,theta; //Double precision for the trigonometric recurrences.
theta=3.14159265358979323846/(double) (arraywidth>>1); //Initialize the recurrence.
//convert the 1D real FFT into a complex FFT of half the size -- this can be speeded up using a fragment program and is slow on CPUs
if (sign == 1) {
c2 = -0.5;
for(i=0;i<nsize;i++){
for(int j=0;j<arrayheight;j++){
ind=arrayheight*i;
reals[ind+j] = real[2*ind+j];
imags[ind+j] = real[2*ind+arrayheight+j];
}
}
result=manycgpufft1d(reals,imags, nsize, arrayheight, sign, 1);
}
else {
c2=0.5; //Otherwise set up for an inverse trans
theta= -theta; //form.
for(i=0;i<nsize;i++){
for(int j=0;j<arrayheight;j++){
ind=arrayheight*i;
reals[ind+j] = real[2*ind+j];
imags[ind+j] = real[2*ind+arrayheight+j];
}
}
result=manycgpufft1d(reals,imags, nsize, arrayheight, sign, 1);
}
for(j=0;j<arrayheight;j++){ // rearrange for each ft
wtemp=sin(0.5*theta);
wpr = -2.0*wtemp*wtemp;
wpi=sin(theta);
wr=1.0+wpr;
wi=wpi;
np3=arraywidth+1; // 0 index
real[0]=reals[0];
real[1]=imags[0];
for(i=1;i<nsize;i++){
real[2*i] = reals[nsize-i];
real[2*i+1] = imags[nsize-i];
}
for (i=2;i<=(arraywidth>>2);i++) { //Case i=1 done separately below.
i4=1+(i3=np3-(i2=1+(i1=i+i-2))); // 0 index
h1r= c1*(real[i1]+real[i3]); //The two separate transforms are sep
h1i= c1*(real[i2]-real[i4]); //arated out of real.
h2r = -c2*(real[i2]+real[i4]);
h2i=c2*(real[i1]-real[i3]);
real[i1]=h1r+wr*h2r-wi*h2i; //Here they are recombined to form the true transform of the original real real.
real[i2]=h1i+wr*h2i+wi*h2r;
real[i3]=h1r-wr*h2r+wi*h2i;
real[i4] = -h1i+wr*h2i+wi*h2r;
wr=(wtemp=wr)*wpr-wi*wpi+wr; //The recurrence.
wi=wi*wpr+wtemp*wpi+wi;
}
real[0] = (h1r=real[0])+real[1]; //Squeeze the first and last real together to get them all within the original array.
real[1] = h1r-real[1];
real=real+arraywidth;
reals=reals+(nsize);
imags=imags+(nsize);
}
real=real-(arrayheight*arraywidth);
reals=reals-(arrayheight*nsize);
imags=imags-(arrayheight*nsize);
delete reals;
delete imags;
return result;
}
FFT_ERROR GPUFFTW::cGPUFFTW2d(float *real, float *imag, int arraywidth, int arrayheight, int sign, int isreal){
return FFT_ARRAY_TOO_SMALL;//2d FFT not incorporated in the library yet -- write the transpose function and use the 1-D FFT code
if(arraywidth < MIN_SIZE || arrayheight<MIN_SIZE){
return (FFT_ARRAY_TOO_SMALL);
}
//amount of data to be ffted on the GPU
int gpuWidth=0;
//Find largest power of 2 smaller than size
int logWidth=0;
for(;arraywidth>>logWidth;logWidth++);
logWidth--;
gpuWidth=1<<logWidth;
int cpuSize=arraywidth-gpuWidth;
if(cpuSize==0){
//FFT_ERROR err = setFFTParams(gpuSize);
Width= arraywidth/4;
Height = arrayheight;
logWidth-=2;
LOGN=logWidth;
glActiveTextureARB( GL_TEXTURE3_ARB );
glBindTexture(GL_TEXTURE_RECTANGLE_NV, textureid[3]);
glTexSubImage2D(GL_TEXTURE_RECTANGLE_NV, 0, 0, 0, Width, Height, GL_RGBA, GL_FLOAT, imag);
uploadData(real);
Stopwatch bitonic_timer;
glFinish();
bitonic_timer.Start();
//for(int dummy=0;dummy<100;dummy++)
if(isreal){
//rearrange
}
cgpufft1d(sign,1);
if(isreal){
//rearrange
}
//transpose
Width=arrayheight/4;
Height = arraywidth;
int logHt=0;
for(;arrayheight>>logHt;logHt++);
logHt--;
LOGN=logHt-2;
cgpufft1d(sign,1);
//transpose again
Width= arraywidth/4;
Height = arrayheight;
glFinish();
bitonic_timer.Stop();
printf("time:%4.5f \n", bitonic_timer.GetTime());
readbackData(imag);
glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, fb);
if(target==1)
glReadBuffer( GL_COLOR_ATTACHMENT1_EXT);
else
glReadBuffer( GL_COLOR_ATTACHMENT0_EXT);
glReadPixels(0, 0, Width, Height, GL_RGBA, GL_FLOAT, real);
glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, 0);
if(sign==-1){
for(int d=0;d<Width;d++)
for(int k=0;k<Height;k++){
real[d]/=(arraywidth*arrayheight);
imag[d]/=(arraywidth*arrayheight);;
}
}
}
else return(FFT_NON_POWER_OF_TWO);
return(FFT_SUCCESS);
}
/** Send data to the GPU from the CPU */
void GPUFFTW::uploadData(void *array){
//Upload into data into texture
glActiveTextureARB( GL_TEXTURE2_ARB );
glBindTexture(GL_TEXTURE_RECTANGLE_NV, textureid[1]);
glTexSubImage2D(GL_TEXTURE_RECTANGLE_NV, 0, 0, 0, Width, Height, GL_RGBA, GL_FLOAT, array);
}
/** Readback the data from the GPU to CPU */
void GPUFFTW::readbackData(void *data){
glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, fb);
if(target==1)
glReadBuffer( GL_COLOR_ATTACHMENT3_EXT);
else
glReadBuffer( GL_COLOR_ATTACHMENT2_EXT);
glReadPixels(0, 0, Width, Height, GL_RGBA, GL_FLOAT, data);
glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, 0);
}
// Swaps the texture to "render from" and the texture to "render to"
inline void GPUFFTW::PingPong(){
int temp=source;
source = target;
target = temp;
if(target==0){
//glDrawBuffer( GL_COLOR_ATTACHMENT0_EXT);
glDrawBuffers(2, buffers1);
glActiveTextureARB( GL_TEXTURE2_ARB );
glBindTexture(GL_TEXTURE_RECTANGLE_NV, textureid[1]);
glActiveTextureARB( GL_TEXTURE3_ARB );
glBindTexture(GL_TEXTURE_RECTANGLE_NV, textureid[3]);
}
else{
//glDrawBuffer( GL_COLOR_ATTACHMENT1_EXT);
glDrawBuffers(2, buffers2);
glActiveTextureARB( GL_TEXTURE2_ARB );
glBindTexture(GL_TEXTURE_RECTANGLE_NV, textureid[0]);
glActiveTextureARB( GL_TEXTURE3_ARB );
glBindTexture(GL_TEXTURE_RECTANGLE_NV, textureid[2]);
}
}
inline void GPUFFTW::PingPong2to4(){
int temp=source;
source = target;
target = temp;
if(target==0){
glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT,
GL_COLOR_ATTACHMENT0_EXT,
GL_TEXTURE_RECTANGLE_NV, textureid[0], 0);
glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT,
GL_COLOR_ATTACHMENT1_EXT,
GL_TEXTURE_RECTANGLE_NV, textureid[4], 0);
glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT,
GL_COLOR_ATTACHMENT2_EXT,
GL_TEXTURE_RECTANGLE_NV, textureid[2], 0);
glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT,
GL_COLOR_ATTACHMENT3_EXT,
GL_TEXTURE_RECTANGLE_NV, textureid[5], 0);
glDrawBuffers(4, buffers41);
glActiveTextureARB( GL_TEXTURE2_ARB );
glBindTexture(GL_TEXTURE_RECTANGLE_NV, textureid[1]);
glActiveTextureARB( GL_TEXTURE3_ARB );
glBindTexture(GL_TEXTURE_RECTANGLE_NV, textureid[3]);
}
else{
glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT,
GL_COLOR_ATTACHMENT0_EXT,
GL_TEXTURE_RECTANGLE_NV, textureid[4], 0);
glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT,
GL_COLOR_ATTACHMENT1_EXT,
GL_TEXTURE_RECTANGLE_NV, textureid[1], 0);
glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT,
GL_COLOR_ATTACHMENT2_EXT,
GL_TEXTURE_RECTANGLE_NV, textureid[5], 0);
glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT,
GL_COLOR_ATTACHMENT3_EXT,
GL_TEXTURE_RECTANGLE_NV, textureid[3], 0);
glDrawBuffers(4, buffers42);
glActiveTextureARB( GL_TEXTURE2_ARB );
glBindTexture(GL_TEXTURE_RECTANGLE_NV, textureid[0]);
glActiveTextureARB( GL_TEXTURE3_ARB );
glBindTexture(GL_TEXTURE_RECTANGLE_NV, textureid[2]);
}
}
inline void GPUFFTW::PingPong4to2(){
if(target==0){
glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT,
GL_COLOR_ATTACHMENT1_EXT,
GL_TEXTURE_RECTANGLE_NV, textureid[1], 0);
glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT,
GL_COLOR_ATTACHMENT3_EXT,
GL_TEXTURE_RECTANGLE_NV, textureid[3], 0);
glDrawBuffers(2, buffers1);
glActiveTextureARB( GL_TEXTURE2_ARB );
glBindTexture(GL_TEXTURE_RECTANGLE_NV, textureid[4]);
glActiveTextureARB( GL_TEXTURE3_ARB );
glBindTexture(GL_TEXTURE_RECTANGLE_NV, textureid[5]);
}
else{
glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT,
GL_COLOR_ATTACHMENT0_EXT,
GL_TEXTURE_RECTANGLE_NV, textureid[0], 0);
glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT,
GL_COLOR_ATTACHMENT2_EXT,
GL_TEXTURE_RECTANGLE_NV, textureid[2], 0);
glDrawBuffers(2, buffers2);
glActiveTextureARB( GL_TEXTURE2_ARB );
glBindTexture(GL_TEXTURE_RECTANGLE_NV, textureid[4]);
glActiveTextureARB( GL_TEXTURE3_ARB );
glBindTexture(GL_TEXTURE_RECTANGLE_NV, textureid[5]);
}
}
#define BLOCK_SIZE 64
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -