⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 gpufftw.cpp

📁 基于GPU进行快速科学计算
💻 CPP
📖 第 1 页 / 共 3 页
字号:
	
		for(i=0;i<nsize;i++){
			reals[i] = real[2*i];
			imags[i] = real[2*i+1];

		}
		result=cgpufft1d(reals,imags, size/2, sign,1);
		for(i=0;i<nsize;i++){
			real[2*i] = reals[i];
			real[2*i+1] = imags[i];

		}
		delete reals;
		delete imags;
		
	} //case isign=-1.

	return result;



}

FFT_ERROR GPUFFTW::manycgpufft1d(float *real, float  *imag, int arraywidth, int arrayheight, int sign, int isreal){
	

	if(arraywidth < MIN_SIZE){
		return (FFT_ARRAY_TOO_SMALL);
	}
	
	//amount of data to be ffted on the GPU
	int gpuWidth=0;

	//Find largest power of 2 smaller than size
	int logWidth=0;
	for(;arraywidth>>logWidth;logWidth++);
	logWidth--;
	
	gpuWidth=1<<logWidth;
	int cpuSize=arraywidth-gpuWidth;

	if(cpuSize==0){
		FFT_ERROR err = setFFTParams(arraywidth*arrayheight);
		logWidth-=2;
		LOGN=logWidth;
		
		glActiveTextureARB( GL_TEXTURE3_ARB );
		glBindTexture(GL_TEXTURE_RECTANGLE_NV, textureid[3]);    
		glTexSubImage2D(GL_TEXTURE_RECTANGLE_NV, 0, 0, 0, Width, Height, GL_RGBA, GL_FLOAT, imag);
		
		
		uploadData(real);
		glFinish();

		cgpufft1d(sign);
		glFinish();

		readbackData(imag);	
		
		glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, fb);
		if(target==1) 
			glReadBuffer( GL_COLOR_ATTACHMENT1_EXT);
		else
			glReadBuffer( GL_COLOR_ATTACHMENT0_EXT);

		glReadPixels(0, 0, Width, Height, GL_RGBA, GL_FLOAT, real);

		glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, 0);

		if(sign==-1){
			if(isreal==1){
				int tmp = arraywidth*2;
				for(int d=0;d<arrayheight*arraywidth;d++){
					real[d]/=tmp;
					imag[d]/=tmp;
				}
			}
			else {
				for(int d=0;d<arrayheight*arraywidth;d++){
					real[d]/=arraywidth;
					imag[d]/=arraywidth;
				}
			}
		}



	}
	else return(FFT_NON_POWER_OF_TWO);

	return(FFT_SUCCESS);
}


FFT_ERROR GPUFFTW::manyscgpufft1d(float *real, int arraywidth, int arrayheight, int sign){
	int nsize=arraywidth/2,ind;
	float *reals = new float[nsize*arrayheight];
	float *imags = new float[nsize*arrayheight];
	FFT_ERROR result;


	unsigned long i,i1,i2,i3,i4,j,np3;
	float c1=0.5,c2,h1r,h1i,h2r,h2i;
	double wr,wi,wpr,wpi,wtemp,theta; //Double precision for the trigonometric recurrences.
	theta=3.14159265358979323846/(double) (arraywidth>>1); //Initialize the recurrence.
	
	//convert the 1D real FFT into a complex FFT of half the size -- this can be speeded up using a fragment program and is slow on CPUs
	if (sign == 1) {
		c2 = -0.5;
		for(i=0;i<nsize;i++){
			for(int j=0;j<arrayheight;j++){
				ind=arrayheight*i;
				reals[ind+j] = real[2*ind+j];
				imags[ind+j] = real[2*ind+arrayheight+j];
			}
		}
		result=manycgpufft1d(reals,imags, nsize, arrayheight, sign, 1);
	} 
	else {
		c2=0.5; //Otherwise set up for an inverse trans
		theta= -theta; //form.
		for(i=0;i<nsize;i++){
			for(int j=0;j<arrayheight;j++){
				ind=arrayheight*i;
				reals[ind+j] = real[2*ind+j];
				imags[ind+j] = real[2*ind+arrayheight+j];
			}
		}
		result=manycgpufft1d(reals,imags, nsize, arrayheight, sign, 1);
	}

	for(j=0;j<arrayheight;j++){ // rearrange for each ft
		wtemp=sin(0.5*theta);
		wpr = -2.0*wtemp*wtemp;
		wpi=sin(theta);
		wr=1.0+wpr;
		wi=wpi;
		np3=arraywidth+1; // 0 index
		real[0]=reals[0];
		real[1]=imags[0];
		for(i=1;i<nsize;i++){
			real[2*i] = reals[nsize-i];
			real[2*i+1] = imags[nsize-i];
		}
		for (i=2;i<=(arraywidth>>2);i++) { //Case i=1 done separately below.
			i4=1+(i3=np3-(i2=1+(i1=i+i-2))); // 0 index
			h1r= c1*(real[i1]+real[i3]); //The two separate transforms are sep
			h1i= c1*(real[i2]-real[i4]); //arated out of real.
			h2r = -c2*(real[i2]+real[i4]);
			h2i=c2*(real[i1]-real[i3]);
			real[i1]=h1r+wr*h2r-wi*h2i; //Here they are recombined to form the true transform of the original real real.
			real[i2]=h1i+wr*h2i+wi*h2r;
			real[i3]=h1r-wr*h2r+wi*h2i;
			real[i4] = -h1i+wr*h2i+wi*h2r;
			wr=(wtemp=wr)*wpr-wi*wpi+wr; //The recurrence.
			wi=wi*wpr+wtemp*wpi+wi;
		}
		real[0] = (h1r=real[0])+real[1]; //Squeeze the first and last real together to get them all within the original array.
		real[1] = h1r-real[1];
		real=real+arraywidth;
		reals=reals+(nsize);
		imags=imags+(nsize);
	}
	real=real-(arrayheight*arraywidth);
	reals=reals-(arrayheight*nsize);
	imags=imags-(arrayheight*nsize);

	delete reals;
	delete imags;

	return result;

}

FFT_ERROR GPUFFTW::cGPUFFTW2d(float *real, float  *imag, int arraywidth, int arrayheight, int sign, int isreal){

	return FFT_ARRAY_TOO_SMALL;//2d FFT not incorporated in the library yet -- write the transpose function and use the 1-D FFT code

	if(arraywidth < MIN_SIZE || arrayheight<MIN_SIZE){
		return (FFT_ARRAY_TOO_SMALL);
	}
	//amount of data to be ffted on the GPU
	int gpuWidth=0;

	//Find largest power of 2 smaller than size
	int logWidth=0;
	for(;arraywidth>>logWidth;logWidth++);
	logWidth--;
	
	gpuWidth=1<<logWidth;
	int cpuSize=arraywidth-gpuWidth;

	if(cpuSize==0){
		//FFT_ERROR err = setFFTParams(gpuSize);
		Width= arraywidth/4;
		Height = arrayheight;
		logWidth-=2;
		LOGN=logWidth;

		
		glActiveTextureARB( GL_TEXTURE3_ARB );
		glBindTexture(GL_TEXTURE_RECTANGLE_NV, textureid[3]);    
		glTexSubImage2D(GL_TEXTURE_RECTANGLE_NV, 0, 0, 0, Width, Height, GL_RGBA, GL_FLOAT, imag);
		
		
		uploadData(real);
		Stopwatch bitonic_timer;
		glFinish();
		bitonic_timer.Start();
		//for(int dummy=0;dummy<100;dummy++)

		if(isreal){
			//rearrange
		}
		cgpufft1d(sign,1);
		if(isreal){
			//rearrange
		}

		//transpose
		Width=arrayheight/4;
		Height = arraywidth;
		int logHt=0;
		for(;arrayheight>>logHt;logHt++);
		logHt--;
		LOGN=logHt-2;
		cgpufft1d(sign,1);

		//transpose again
		Width= arraywidth/4;
		Height = arrayheight;
		
		
	






		glFinish();
		bitonic_timer.Stop();
		printf("time:%4.5f \n", bitonic_timer.GetTime());



		readbackData(imag);	
		
		glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, fb);
		if(target==1) 
			glReadBuffer( GL_COLOR_ATTACHMENT1_EXT);
		else
			glReadBuffer( GL_COLOR_ATTACHMENT0_EXT);

		glReadPixels(0, 0, Width, Height, GL_RGBA, GL_FLOAT, real);

		glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, 0);

		if(sign==-1){
			for(int d=0;d<Width;d++)
				for(int k=0;k<Height;k++){
					real[d]/=(arraywidth*arrayheight);
					imag[d]/=(arraywidth*arrayheight);;
				}
		}

	}
	else return(FFT_NON_POWER_OF_TWO);

	return(FFT_SUCCESS);
}




/** Send data to the GPU from the CPU */
void GPUFFTW::uploadData(void *array){
	//Upload into data into texture	
	glActiveTextureARB( GL_TEXTURE2_ARB );
	glBindTexture(GL_TEXTURE_RECTANGLE_NV, textureid[1]);    
	glTexSubImage2D(GL_TEXTURE_RECTANGLE_NV, 0, 0, 0, Width, Height, GL_RGBA, GL_FLOAT, array);	
	
	
}
/** Readback the data from the GPU to CPU */
void GPUFFTW::readbackData(void *data){
	glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, fb);
	if(target==1) 
		glReadBuffer( GL_COLOR_ATTACHMENT3_EXT);
	else
		glReadBuffer( GL_COLOR_ATTACHMENT2_EXT);

	glReadPixels(0, 0, Width, Height, GL_RGBA, GL_FLOAT, data);

	glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, 0);		
}

// Swaps the texture to "render from" and the texture to "render to"
inline void GPUFFTW::PingPong(){
	int temp=source;
	source = target;
	target = temp;

	if(target==0){
		//glDrawBuffer( GL_COLOR_ATTACHMENT0_EXT);
		glDrawBuffers(2, buffers1);
		glActiveTextureARB( GL_TEXTURE2_ARB );
		glBindTexture(GL_TEXTURE_RECTANGLE_NV, textureid[1]);
		glActiveTextureARB( GL_TEXTURE3_ARB );
		glBindTexture(GL_TEXTURE_RECTANGLE_NV, textureid[3]);
	
	}
	else{
		//glDrawBuffer( GL_COLOR_ATTACHMENT1_EXT);
		glDrawBuffers(2, buffers2);
		glActiveTextureARB( GL_TEXTURE2_ARB );
		glBindTexture(GL_TEXTURE_RECTANGLE_NV, textureid[0]);
		glActiveTextureARB( GL_TEXTURE3_ARB );
		glBindTexture(GL_TEXTURE_RECTANGLE_NV, textureid[2]);
	
	}
}



inline void GPUFFTW::PingPong2to4(){
	int temp=source;
		source = target;
		target = temp;
		if(target==0){
			glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT,
                                  GL_COLOR_ATTACHMENT0_EXT,
                                  GL_TEXTURE_RECTANGLE_NV, textureid[0], 0);


			glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT,
                                  GL_COLOR_ATTACHMENT1_EXT,
                                  GL_TEXTURE_RECTANGLE_NV, textureid[4], 0);


			glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT,
                                  GL_COLOR_ATTACHMENT2_EXT,
                                  GL_TEXTURE_RECTANGLE_NV, textureid[2], 0);


			glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT,
                                  GL_COLOR_ATTACHMENT3_EXT,
                                  GL_TEXTURE_RECTANGLE_NV, textureid[5], 0);

			glDrawBuffers(4, buffers41);
			glActiveTextureARB( GL_TEXTURE2_ARB );
			glBindTexture(GL_TEXTURE_RECTANGLE_NV, textureid[1]);
			glActiveTextureARB( GL_TEXTURE3_ARB );
			glBindTexture(GL_TEXTURE_RECTANGLE_NV, textureid[3]);
		
		}
		else{
			glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT,
                                  GL_COLOR_ATTACHMENT0_EXT,
                                  GL_TEXTURE_RECTANGLE_NV, textureid[4], 0);

			glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT,
                                  GL_COLOR_ATTACHMENT1_EXT,
                                  GL_TEXTURE_RECTANGLE_NV, textureid[1], 0);

			glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT,
                                  GL_COLOR_ATTACHMENT2_EXT,
                                  GL_TEXTURE_RECTANGLE_NV, textureid[5], 0);

			glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT,
                                  GL_COLOR_ATTACHMENT3_EXT,
                                  GL_TEXTURE_RECTANGLE_NV, textureid[3], 0);


			glDrawBuffers(4, buffers42);
			glActiveTextureARB( GL_TEXTURE2_ARB );
			glBindTexture(GL_TEXTURE_RECTANGLE_NV, textureid[0]);
			glActiveTextureARB( GL_TEXTURE3_ARB );
			glBindTexture(GL_TEXTURE_RECTANGLE_NV, textureid[2]);
		
		}
}

inline void GPUFFTW::PingPong4to2(){
	if(target==0){
			glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT,
                                  GL_COLOR_ATTACHMENT1_EXT,
                                  GL_TEXTURE_RECTANGLE_NV, textureid[1], 0);
			glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT,
                                  GL_COLOR_ATTACHMENT3_EXT,
                                  GL_TEXTURE_RECTANGLE_NV, textureid[3], 0);


			
			glDrawBuffers(2, buffers1);
			glActiveTextureARB( GL_TEXTURE2_ARB );
			glBindTexture(GL_TEXTURE_RECTANGLE_NV, textureid[4]);
			glActiveTextureARB( GL_TEXTURE3_ARB );
			glBindTexture(GL_TEXTURE_RECTANGLE_NV, textureid[5]);
		
		}
		else{
			glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT,
                                  GL_COLOR_ATTACHMENT0_EXT,
                                  GL_TEXTURE_RECTANGLE_NV, textureid[0], 0);

	
			glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT,
                                  GL_COLOR_ATTACHMENT2_EXT,
                                  GL_TEXTURE_RECTANGLE_NV, textureid[2], 0);

			glDrawBuffers(2, buffers2);
			glActiveTextureARB( GL_TEXTURE2_ARB );
			glBindTexture(GL_TEXTURE_RECTANGLE_NV, textureid[4]);
			glActiveTextureARB( GL_TEXTURE3_ARB );
			glBindTexture(GL_TEXTURE_RECTANGLE_NV, textureid[5]);
		
		}

}



#define BLOCK_SIZE 64

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -