📄 qfft.br
字号:
#include <stdio.h>#include "main.h"#include "wfft.h"extern int debug_fft;int split_bit_reverse=0;int do_untangle=0;extern float2 * data;/*static void __printf_cpu_inner (float a, float b, float c, float d) { printf ("%f %f == %f %f\n",a,b,c,d);}static void __print_cpu_inner (float a, float b, float c, float d) { printf ("%f %f -> %f %f\n",a,b,c,d);}*/kernel float2 mult_complex1(float2 a <>, float2 b <>){ float2 negpos = {-1,1}; return a.xx*b.xy+negpos*a.yy*b.yx; // c.x = a.x*b.x - a.y*b.y; // c.y = a.x*b.y + a.y*b.x;}kernel float4 mult_complex2(float4 a <>, float2 b <>){ float4 negpos={-1,1,-1,1}; return a.xxzz*b.xyxy +negpos*a.yyww*b.yxyx; // c.xz = a.xz*b.xx - a.yw*b.yy; // c.yw = a.xz*b.yy + a.yw*b.xx;}extern int DOTIMES;kernel float4 DoDFTHorizontalInner (float2 r<>, float2 t<>, float2 W <>) { float4 ret; float2 tw=mult_complex1(t,W); ret.xy = r+tw; ret.zw = r-tw; return ret;}kernel voidDFTX (float4 s[][], float2 W<>, out float4 s_prime<>, iter float2 indexR<>, iter float2 indexI<>, iter float2 s_index<>) { float2 temp= round(fmod(s_index.x,2))==1; float4 r,t; r = s[indexR]; t = s[indexI]; s_prime=DoDFTHorizontalInner (temp.xx?r.zw:r.xy,temp.xx?t.zw:t.xy,W);} kernel voidDFTY (float4 s[][], float2 W<>, out float4 s_prime<>, iter float2 indexR<>, iter float2 indexI<>, iter float2 s_index<>) { float4 r = s[indexR]; float4 t = s[indexI]; float4 tw= mult_complex2(t,W); s_prime=r+((round(fmod(s_index.y,2))!=1)?tw:-tw);} kernel void optBitReverseY (out float4 s_out<>, float4 indicesXo2YXp1o2Xm2[], float4 s[][]) { float4 indexY = indicesXo2YXp1o2Xm2[(indexof s_out).y]; float2 outindex = {(indexof s_out).x,indexY.y}; s_out = s[outindex];}kernel void bitReverseXo2 (out float4 s_out<>, float4 indicesXo2YXp1o2Xm2[], float4 s[][]) { float4 indexX = indicesXo2YXp1o2Xm2[(indexof s_out).x]; float2 outindex = {indexX.x,(indexof s_out).y}; float4 s_temp = s[outindex]; s_out.xy = indexX.ww?s_temp.zw:s_temp.xy; outindex.x = indexX.z;//get the index + 1 (adds half the stream) s_temp = s[outindex]; s_out.zw = indexX.ww?s_temp.zw:s_temp.xy;}kernel void bitReverseXo2Y (out float4 s_out<>, float4 indicesXo2YXp1o2Xm2[], float4 s[][]) { float4 indexX = indicesXo2YXp1o2Xm2[(indexof s_out).x]; float4 indexY = indicesXo2YXp1o2Xm2[(indexof s_out).y]; float2 outindex = {indexX.x,indexY.y}; float4 s_temp = s[outindex]; s_out.xy = indexX.ww?s_temp.zw:s_temp.xy; outindex.x = indexX.z;//get the index + 1 (adds half the stream) s_temp = s[outindex]; s_out.zw = indexX.ww?s_temp.zw:s_temp.xy;}// Utilities and the BitReverse() procedure// To compute 2**xstatic int TwoPowerX(int nNumber) { // Achieve the multiplication by left-shifting return (1<<nNumber);}// Procedure to reverse the bits. // Example: // INPUTS: nNumberToReverse = 110; nNumberOfBits = 3;// OUTPUT: nReversedNumber = 011// CAUTION: Make sure that you pass atleast the minimum number of bits to represent the // number. For reversing 6 you need atleast 3 bits, if only 2 bits is passed then// we get junk results.static int BitReverse(int nNumberToReverse, int nNumberOfBits) { int nBitIndex; int nReversedNumber = 0; for (nBitIndex = nNumberOfBits-1; nBitIndex >= 0; --nBitIndex) { if ((1 == nNumberToReverse >> nBitIndex)) { nReversedNumber += TwoPowerX(nNumberOfBits-1-nBitIndex); nNumberToReverse -= TwoPowerX(nBitIndex); } } return(nReversedNumber);}float4 * bitReversedIndices (int logN, int logM) { int i,N = (1<< logN),M = (1<<logM); int maxNM = N>(M/2)?N:(M/2); float4 * s_array; s_array = (float4*)calloc(maxNM,sizeof(float4)); for (i=0;i<maxNM;++i) { int temp = BitReverse(i*2,logM); s_array[i].x=(float)(temp/2); s_array[i].y=(float)BitReverse(i,logN); s_array[i].z=(float)(BitReverse(i*2+1,logM)/2); s_array[i].w=(float)(temp%2); } return s_array;}kernel void UntangleFrag(float4 input[][], out float4 output<>, float2 NNo2) { float4 H1 = input[indexof(output)]; float2 outputindex = {(indexof output).x,NNo2.x-(indexof output).y}; float4 HN = input[outputindex]; float4 F,G; float4 HPLUS = .5*(H1+HN); float4 HMINUS = .5*(H1-HN); F.xz = HPLUS.xz; F.yw = HMINUS.yw; G.xz = -HMINUS.xz; G.yw = HPLUS.yw; if ((indexof output).y>NNo2.y) output = G; else output = F;}kernel void DoubleFrag(float4 input[][], out float4 output<>, float2 NNo2, iter float2 where<>) { float4 H1 = input[where]; float2 outputindex = {where.x,where.y+NNo2.y}; float4 HN = input[outputindex]; float4 F,G; float4 HPLUS = .5*(H1+HN); float4 HMINUS = .5*(H1-HN); F.xz = HPLUS.xz; F.yw = HMINUS.yw; G.xz = -HMINUS.xz; G.yw = HPLUS.yw; if ((indexof output).y>NNo2.y) output = G; else output = F;}#define FFT_HORIZONTAL 0#define FFT_VERTICAL 1#define FFT_2D 2float tof(int i) {return (float)i;}void FftwTransform2d(float2 *data, unsigned long N, unsigned long M, int isign, char cast);void computeFFT2d(float2 *input, float2 *output, int logN, int logM, int N, int M, int twod){ float4 s<N,(M/2)>; float4 s_out<N,(M/2)>; float4 s_out_final<N,M>; float4 * rawindices=0; float4 indicesXo2YXp1o2Xm2<(M/2>N?M/2:N)>; iter float2 iter1 <N,(M/2)> =iter(float2(0,0),float2(tof(M/4),tof(N))); iter float2 iter2 <N,(M/2)> =iter(float2(tof(M/4),0),float2(tof(M/2),tof(N))); iter float2 iter3 <N,(M/2)> =iter(float2(0,0),float2(tof(M/2),tof(N/2))); iter float2 iter4 <N,(M/2)> =iter(float2(0,tof(N/2)),float2(tof(M/2),tof(N))); iter float2 s_iter <N,(M/2)> =iter(float2(0,0),float2(tof(M/2),tof(N))); float indicesY<N>; int nPass,nBits,nPassCounter,thereAndBack; printf ("Going with a %d by %d texture of float2s\n or rather a %d by %d texture of float4s\n",N,M,N,(M/2)); rawindices=bitReversedIndices(logN,logM); streamRead(indicesXo2YXp1o2Xm2,rawindices); streamRead(s,input); if (debug_fft) { printf("FFT Input:\n"); streamPrint(s); } for (thereAndBack=0;thereAndBack<DOTIMES;++thereAndBack) { if (1==N*M) streamSwap(s_out,s); else { nPass=nBits=logN; for(nPassCounter=0;nPassCounter<nPass;++nPassCounter) { DFTY(s,getW(nPassCounter,logN,0),s_out,iter3,iter4,s_iter); if (debug_fft==2) { printf ("\nY STAGE %d\n",nPassCounter); streamPrint(s_out); } streamSwap(s,s_out); } if (split_bit_reverse) { optBitReverseY(s_out,indicesXo2YXp1o2Xm2,s); if (do_untangle) { UntangleFrag(s_out,s,float2((float)N,(float)(N/2))); }else { streamSwap(s,s_out); } } nPass=nBits=logM; for (nPassCounter=0;nPassCounter<nPass;++nPassCounter) { DFTX(s,getW(nPassCounter,logM,1),s_out,iter1,iter2,s_iter); if (debug_fft==2) { printf ("\nX STAGE %d\n",nPassCounter); streamPrint(s_out); } streamSwap(s,s_out); } if (split_bit_reverse) { bitReverseXo2(s_out,indicesXo2YXp1o2Xm2,s); if (do_untangle){ DoubleFrag(s_out,s_out_final,float2((float)N,(float)(N/2)),iter1); } } } if (!split_bit_reverse) {//now do it separately bitReverseXo2Y(s_out,indicesXo2YXp1o2Xm2,s); } } free(rawindices); if (output) streamWrite(s_out,output); if (debug_fft) { printf("\nStream Output\n"); streamPrint(s_out); } }#define PrintResults(sstart, sstop, name) \ printf("%9d %6d %6.2f (* %s *)\\n", \ M, (int) (sstop - sstart), \ (float)(DOTIMES*10.*(logN+logM)*M*N) / (float) (sstop - sstart), name);void doOptFFT(int logN) { int logM = logN; int i,N = (1<<logN); int M = (1<<logM); float2 * output=0; output = (float2*)malloc(sizeof(float2)*2*M*N); start = GetTime(); for (i=0;i<4;++i) { computeFFT2d(data,output,logM,logN,M,N,FFT_2D); } stop = GetTime(); { PrintResults(start,stop,"FFT"); } //FftwTransform2d(data,N,M,1,1);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -