📄 hhcfft4.asm
字号:
lcntr=r0, do mstage until lce; /* do STAGES-2 stages */
i7=dm(_Rad4FFT_cosine); /* first real twiddle */
i15=dm(_Rad4FFT_sine); /* first imag twiddle */
r8=dm(_Rad4FFT_redata);
r9=dm(_Rad4FFT_imdata);
i0=r8; /* upper real path */
r10=r8+r2; i8=r9; /* upper imaginary path */
i1=r10; /* second real input path */
r10=r10+r2, i4=r10; /* second real output path */
i2=r10; /* third real input path */
r10=r10+r2, i5=r10; /* third real output path */
i3=r10; /* fourth real input path */
r10=r9+r2, i6=r10; /* fourth real output path */
i9=r10; /* second imag input path */
r10=r10+r2, i12=r10; /* second imag output path */
i10=r10; /* third imag input path */
r10=r10+r2, i13=r10; /* third imag output path */
i11=r10; /* fourth imag input path */
i14=r10; /* fourth imag output path */
m4=r3;
m12=r3;
r4=r3+1, m6=r2;
m3=r4;
r2=r2-1, m11=r4;
m7=r2;
lcntr=m5, do mgroup until lce; /* do m5 groups */
f0=dm(i7,m0), f5=pm(i9,m8);
f8=f0*f5, f4=dm(i1,m0), f1=pm(i15,m8);
f9=f0*f4;
f12=f1*f5, f0=dm(i7,m0), f5=pm(i11,m8);
f13=f1*f4, f12=f9+f12, f4=dm(i3,m0), f1=pm(i15,m8);
f8=f0*f4, f2=f8-f13;
f13=f1*f5;
f9=f0*f5, f8=f8+f13, f0=dm(i7,m1), f5=pm(i10,m8);
f13=f1*f4, f12=f8+f12, f14=f8-f12, f4=dm(i2,m0), f1=pm(i15,m9);
f11=f0*f4;
f13=f1*f5, f6=f9-f13;
f9=f0*f5, f13=f11+f13, f11=dm(i0,0);
f13=f1*f4, f8=f11+f13, f10=f11-f13;
/*___________Do m7 radix-4 butterflies___________*/
lcntr=m7, do mr4bfly until lce;
f13=f9-f13, f4=dm(i1,m0), f5=pm(i9,m8);
f2=f2+f6, f15=f2-f6, f0=dm(i7,m0), f1=pm(i15,m8);
f8=f0*f4, f3=f8+f12, f7=f8-f12, f9=pm(i8,0);
f12=f1*f5, f9=f9+f13, f11=f9-f13, f13=f2;
f8=f0*f5, f12=f8+f12, f0=dm(i7,m0), f5=pm(i11,m8);
f13=f1*f4, f9=f9+f13, f6=f9-f13, f4=dm(i3,m0), f1=pm(i15,m8);
f8=f0*f4, f2=f8-f13, dm(i0,m0)=f3, pm(i8,m8)=f9;
f13=f1*f5, f11=f11+f14, f7=f11-f14, dm(i4,m0)=f7, pm(i12,m8)=f6;
f9=f0*f5, f8=f8+f13, f0=dm(i7,m1), f5=pm(i10,m8);
f13=f1*f4, f12=f8+f12, f14=f8-f12, f4=dm(i2,m0), f1=pm(i15,m9);
f11=f0*f4, f3=f10+f15, f8=f10-f15, pm(i13,m8)=f11;
f13=f1*f5, f6=f9-f13, dm(i6,m0)=f8, pm(i14,m8)=f7;
f9=f0*f5, f13=f11+f13, f11=dm(i0,0);
mr4bfly:
f13=f1*f4, f8=f11+f13, f10=f11-f13, dm(i5,m0)=f3;
/*___________End radix-4 butterfly_____________*/
/* dummy for address update * * */
f13=f9-f13, f0=dm(i7,m2), f1=pm(i15,m10);
f2=f2+f6, f15=f2-f6, f0=dm(i1,m4), f1=pm(i9,m12);
f3=f8+f12, f7=f8-f12, f9=pm(i8,0);
f9=f9+f13, f11=f9-f13, f0=dm(i2,m4);
f9=f9+f2, f6=f9-f2, f0=dm(i3,m4), f1=pm(i10,m12);
dm(i0,m3)=f3, pm(i8,m11)=f9;
f11=f11+f14, f7=f11-f14, dm(i4,m3)=f7, pm(i12,m11)=f6;
f3=f10+f15, f8=f10-f15, pm(i13,m11)=f11;
dm(i6,m3)=f8, pm(i14,m11)=f7;
mgroup: dm(i5,m3)=f3, f1=pm(i11,m12);
r3=m4;
r1=m5;
r2=m6;
r3=ashift r3 by -2; /* groupstep/4 */
r1=ashift r1 by 2; /* groups*4 */
m5=r1;
mstage: r2=ashift r2 by -2; /* butterflies/4 */
/*____________________Last radix-4 stage__________________________________*/
/* Includes bitreversal of the real data in dm */
bit set mode1 BR0 | BR8; /* bitreversal in i0 */
/* with: m0=m8=1 preset */
r0=dm(_Rad4FFT_redata);/* input */
r0=r0+1,i4=r0;
r0=r0+1,i1=r0;
r0=r0+1,i2=r0;
i3=r0;
i0=dm(_Rad4FFT_ORE); /* real output array base must be an integer multiple of N */
m2=dm(_Rad4FFT_OST);
i7=dm(_Rad4FFT_cosine);
i8=dm(_Rad4FFT_OIM); /* input */
r0=dm(_Rad4FFT_imdata);
r0=r0+1,i12=r0;
r0=r0+1,i9=r0;
r0=r0+1,i10=r0;
i11=r0;
i15=dm(_Rad4FFT_sine); /* first imag twiddle */
m1=4;
m9=m1;
m12=dm(_Rad4FFT_OST);
f0=dm(i7,m0), f5=pm(i9,m9);
f8=f0*f5, f4=dm(i1,m1), f1=pm(i15,m8);
f9=f0*f4;
f12=f1*f5, f0=dm(i7,m0), f5=pm(i11,m9);
f13=f1*f4, f12=f9+f12, f4=dm(i3,m1), f1=pm(i15,m8);
f8=f0*f4, f2=f8-f13;
f13=f1*f5;
f9=f0*f5, f8=f8+f13, f0=dm(i7,m0), f5=pm(i10,m9);
f13=f1*f4, f12=f8+f12, f14=f8-f12, f4=dm(i2,m1), f1=pm(i15,m8);
f11=f0*f4;
f13=f1*f5, f6=f9-f13;
f9=f0*f5, f13=f11+f13, f11=dm(i4,m1);
f13=f1*f4, f8=f11+f13, f10=f11-f13;
/*________Do N/4-1 radix-4 butterflies_______*/
r5=dm(_Rad4FFT_N4M1);
lcntr=r5, do lstage until lce;
f13=f9-f13, f4=dm(i1,m1), f5=pm(i9,m9);
f2=f2+f6, f15=f2-f6, f0=dm(i7,m0), f1=pm(i15,m8);
f8=f0*f4, f3=f8+f12, f7=f8-f12, f9=pm(i12,m9);
f12=f1*f5, f9=f9+f13, f11=f9-f13, f13=f2;
f8=f0*f5, f12=f8+f12, f0=dm(i7,m0), f5=pm(i11,m9);
f13=f1*f4, f9=f9+f13, f6=f9-f13, f4=dm(i3,m1), f1=pm(i15,m8);
f8=f0*f4, f2=f8-f13, dm(i0,m2)=f3, pm(i8,m12)=f9;
f13=f1*f5, f11=f11+f14, f7=f11-f14, dm(i0,m2)=f7, pm(i8,m12)=f6;
f9=f0*f5, f8=f8+f13, f0=dm(i7,m0), f5=pm(i10,m9);
f13=f1*f4, f12=f8+f12, f14=f8-f12, f4=dm(i2,m1), f1=pm(i15,m8);
f11=f0*f4, f3=f10+f15, f8=f10-f15, pm(i8,m12)=f11;
f13=f1*f5, f6=f9-f13, dm(i0,m2)=f3, pm(i8,m12)=f7;
f9=f0*f5, f13=f11+f13, f11=dm(i4,m1);
lstage:
f13=f1*f4, f8=f11+f13, f10=f11-f13, dm(i0,m2)=f8;
f13=f9-f13;
f2=f2+f6, f15=f2-f6;
f3=f8+f12, f7=f8-f12, f9=pm(i12,m9);
f9=f9+f13, f11=f9-f13, dm(i0,m2)=f3;
f9=f9+f2, f6=f9-f2, dm(i0,m2)=f7;
pm(i8,m12)=f9;
f11=f11+f14, f7=f11-f14, pm(i8,m12)=f6;
f3=f10+f15, f8=f10-f15, pm(i8,m12)=f11;
dm(i0,m2)=f3, pm(i8,m12)=f7;
bit clr mode1 BR0 | BR8 | RND32 | CBUFEN | PEYEN; /* no bitreversal in i0, disable 32bit floating point, circular buffering and SIMD */
dm(i0,m2)=f8;
mode1=dm(SaveStack);
nop;
exit;
_Rad4FFT.end:
/*******************************************************************************
*
* Function: _Rad4FFT - Fast complex floating point FFT
* Prototype:
* void _Rad4FFTInit(
* TCFFT4_simd_size N, // size of fft (power of 4)
* float *reinput, // real input array
* float *iminput, // imaginary input array
* float *reoutput, // real output array
* float *imoutput); // imaginary output array
*
* Synopsis: #include "Rad4FFT.h"
*
* Description:
* This routine initializes the data elements for _Rad4FFT.
* It must be called before Rad4FFT() is called.
*
* Author:
* Darrel Judd
* Judd Labs, Inc.
* 801-756-2057
* drjudd@ieee.org
*
* Revisions:
* Created June, 2003 Darrel Judd
*******************************************************************************/
_Rad4FFT_Init:
r0=reads(1); // get real output address from stack
// init parameters for rad4 fft
i0 = r0;
bitrev(i0,0);
dm(_Rad4FFT_ORE)=i0;
r0=reads(2);
i14 = r0;
bitrev(i14,0);
dm(_Rad4FFT_OIM)=i14;
r0 = lshift r4 by -1;
i0 = r0;
bitrev(i0,0);
r0=i0;
dm(_Rad4FFT_OST)=r0; // bitrev(N/2)
dm(_Rad4FFT_N)=r4; // store N
r2= lshift r4 by -2; // r2=N/4
r2= r2-1; // r2 = N/4-1
dm(_Rad4FFT_N4M1)=r2; // store N/4-1
r2=lshift r4 by -3; //r2=N/8
dm(_Rad4FFT_N8)=r2; // store N/8
r2=lshift r4 by -4; // r2=N/16
dm(_Rad4FFT_N16)=r2; // store N/16
r0=3;
r0=r2*r0 (UUI); //r0 = N*3/16
dm(_Rad4FFT_N16T3)=r0; // store N*3/16
f2= float r4; // calculate the number of stages
r0=logb f2; // get binary exponent
r0= lshift r0 by -1; // convert to log4
r2=2;
r0=r0-r2; // stages-2
dm(_Rad4FFT_StagesM2)=r0;
dm(_Rad4FFT_redata)=r8; // store pointer to real input data
dm(_Rad4FFT_imdata)=r12; // store pointer to imag input data
r3=reads(3);
dm(_Rad4FFT_cosine)=r3; // store pointer to real twiddle factors
r3=reads(4);
dm(_Rad4FFT_sine)=r3; // store pointer ot imaginary twiddle factors
// calculate various pointers
i4=_Rad4FFT_redataPN4;
r0=dm(_Rad4FFT_N);
r0=lshift r0 by -2;
r1=dm(_Rad4FFT_redata);
r1=r0+r1;
r1=r0+r1,dm(i4,1)=r1;
r1=r0+r1,dm(i4,1)=r1;
dm(i4,1)=r1;
r1=dm(_Rad4FFT_imdata);
r1=r0+r1;
r1=r0+r1,dm(i4,1)=r1;
r1=r0+r1,dm(i4,1)=r1;
dm(i4,1)=r1;
exit;
_Rad4FFT_Init.end:
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -