📄 fft.asm
字号:
r6 = r1 + 1, m3 = r1;
r6 = r6 + 1, r0 = m2;
r0 = lshift r0 by -2;
r2 = r0 - 1, m11 = r1;
r0 = r0 + r0, i5 = i4;
r2 = r2 - 1, m2 = r0;
m10 = r0;
i13 = i12;
f4=dm(i1,m2), f5=pm(i9,m10); /* real twiddle, imag twiddle */
f1=dm(i5,m4), f0=pm(i13,m12);
f10=f1*f4, m0 = r6;
f12=f0*f5, m8 = r6;
f11=f1*f5, f9=dm(i5,m3), f8=pm(i13,m11);
f13=f0*f4, f14=f10-f12, f1=dm(i5,m4), f0=pm(i13,m12);
f10=f1*f4, f15=f11+f13, ustat1 = r3;
f12=f0*f5, f6=f8+f15, f2=f8-f15, ustat3 = r2;
lcntr = ustat1, do _mid_loop until lce;
lcntr = ustat3, do _in_loop until lce;
f11=f1*f5, f7=f9+f14, f3=f9-f14, f9=dm(i5,m3), f8=pm(i13,m11);
f13=f0*f4, f14=f10-f12, f1=dm(i5,m4), f0=pm(i13,m12);
f10=f1*f4, f15=f11+f13, dm(i4,m4)=f3, pm(i12,m12)=f2;
_in_loop: f12=f0*f5, f6=f8+f15, f2=f8-f15, dm(i4,m3)=f7, pm(i12,m11)=f6;
f11=f1*f5, f7=f9+f14, f3=f9-f14, f9=dm(i5,m0), f8=pm(i13,m8);
f13=f0*f4, f14=f10-f12, f1=dm(i5,m4), f0=pm(i13,m12);
f4=dm(i1,m2), f5=pm(i9,m10); /* real twiddle, imag twiddle */
f10=f1*f4, f15=f11+f13, dm(i4,m4)=f3, pm(i12,m12)=f2;
f12=f0*f5, f6=f8+f15, f2=f8-f15, dm(i4,m3)=f7, pm(i12,m11)=f6;
f11=f1*f5, f7=f9+f14, f3=f9-f14, f9=dm(i5,m3), f8=pm(i13,m11);
f13=f0*f4, f14=f10-f12, f1=dm(i5,m4), f0=pm(i13,m12);
f10=f1*f4, f15=f11+f13, dm(i4,m4)=f3, pm(i12,m12)=f2;
_mid_loop: f12=f0*f5, f6=f8+f15, f2=f8-f15, dm(i4,m0)=f7, pm(i12,m8)=f6;
i1 = pm(CFFT_TWID_REAL,i14); // Pointer to cosine table
_out_loop: i9 = pm(CFFT_TWID_IMAG,i14); // Pointer to sine table
/******************** 3rd To Last Stage *********************/
_end_2:
i5 = pm(CFFT_REAL_FFTpN4,i14); // Pointer to real_fft+N/4
i13 = pm(CFFT_IMAG_FFTpN4,i14); // Pointer to real_fft+N/4
m2 = 4;
m10 = 4;
r2 = pm(CFFT_mN4,i14); // read -N/4
r3 = r2 + 1, m3 = r2;
r3 = r3 + 1, m11 = r2;
f4=dm(i1,m2), f5=pm(i9,m10); /* real twiddle, imag twiddle */
f1=dm(i5,m3), f0=pm(i13,m11);
f10=f1*f4, m0 = r3;
f12=f0*f5, m8 = r3;
f11=f1*f5, f9=dm(i5,m3), f8=pm(i13,m11);
f13=f0*f4, f14=f10-f12, f1=dm(i5,m3), f0=pm(i13,m11);
f10=f1*f4, f15=f11+f13;
f12=f0*f5, f6=f8+f15, f2=f8-f15;
lcntr = m14, do _mid_loop3 until lce; // loop N/8 times
f11=f1*f5, f7=f9+f14, f3=f9-f14, f9=dm(i5,m0), f8=pm(i13,m8);
f13=f0*f4, f14=f10-f12, f1=dm(i5,m3), f0=pm(i13,m11);
f4=dm(i1,m2), f5=pm(i9,m10);
f10=f1*f4, f15=f11+f13, dm(i4,m3)=f3, pm(i12,m11)=f2;
f12=f0*f5, f6=f8+f15, f2=f8-f15, dm(i4,m3)=f7, pm(i12,m11)=f6;
f11=f1*f5, f7=f9+f14, f3=f9-f14, f9=dm(i5,m3), f8=pm(i13,m11);
f13=f0*f4, f14=f10-f12, f1=dm(i5,m3), f0=pm(i13,m11);
f10=f1*f4, f15=f11+f13, dm(i4,m3)=f3, pm(i12,m11)=f2;
_mid_loop3: f12=f0*f5, f6=f8+f15, f2=f8-f15, dm(i4,m0)=f7, pm(i12,m8)=f6;
/******************** Pre-Last Stage *********************/
_end_3:
r2 = pm(CFFT_N2p2,i14); // read N/2 + 2;
m2 = pm(CFFT_mN2,i14); // read -N/2;
m10 = m2;
i1 = b1;
i9 = b9;
f1=dm(i4,m2), f0=pm(i12,m10);
f4=dm(i1,m1), f5=pm(i9,m9); /* real twiddle, imag twiddle */
f10=f1*f4, m3 = r2;
f12=f0*f5, m11 = r2;
f11=f1*f5, f9=dm(i4,m3), f8=pm(i12,m11);
f13=f0*f4, f14=f10-f12, f1=dm(i5,m2), f0=pm(i13,m10);
f10=f1*f5, f15=f11+f13, i2=i5;
f12=f0*f4, f7=f9+f14, f3=f9-f14, i10=i13;
lcntr = m14, do _mid_loop4 until lce; // loop N/8 times
f11=f1*f4, f6=f8+f15, f2=f8-f15, f9=dm(i5,m3), f8=pm(i13,m11);
f13=f0*f5, f14=f10+f12, f1=dm(i4,m2), f0=pm(i12,m10);
f4=dm(i1,m1), f5=pm(i9,m9); /* real twiddle, imag twiddle */
f10=f1*f4, f15=f11-f13, dm(i3,m2)=f7, pm(i11,m10)=f6; /* cmult (f1,f0)*(f4,f5) */
f12=f0*f5, f7=f9+f14, f3=f9-f14, dm(i3,m3)=f3, pm(i11,m11)=f2;
f11=f1*f5, f2=f8+f15, f6=f8-f15, f9=dm(i4,m3), f8=pm(i12,m11);
f13=f0*f4, f14=f10-f12, f1=dm(i5,m2), f0=pm(i13,m10);
f10=f1*f5, f15=f11+f13, dm(i2,m2)=f7, pm(i10,m10)=f6; /* cmult (f1,f0)*(f4,f5) */
_mid_loop4: f12=f0*f4, f7=f9+f14, f3=f9-f14, dm(i2,m3)=f3, pm(i10,m11)=f2;
/******************** Last Stage ***********************/
_end_4:
bit clr MODE1 BDCST1 | BDCST9;
b2 = pm(CFFT_REAL_INPUT,i14); // read pointer to real_input
b4 = pm(CFFT_REAL_INPUTpN2,i14); // read pointer to real_input + N/2
b10 = pm(CFFT_IMAG_INPUT,i14); // read pointer to imag_input
b12 = pm(CFFT_IMAG_INPUTpN2,i14); // read pointer to imag_input + N/2
r1 = -1;
f15 = 0.0;
r0 = pm(CFFT_REAL_FFT,i14); // read pointer to real_fft
r0 = lshift r0 by r1, i1 = b1;
r0 = r0 + 1, b3 = r0;
b11 = r0;
r0 = pm(CFFT_IMAG_FFT,i14); // read pointer to imag_fft
r0 = lshift r0 by r1, i9 = b9;
r0 = r0 + 1, b5 = r0;
b13 = r0;
f4=dm(i1,m1), f5=pm(i9,m9);
f0=dm(i3,m1), f8=pm(i13,m9);
f8=dm(i5,m1), f0=pm(i11,m9);
f10=f1*f5, f2=f9+f15;
f12=f2*f4;
f10=f1*f4, f13=f10+f12;
f12=f2*f5, f6=f8+f13, f7=f8-f13;
f14=f10-f12, f4=dm(i1,m1), f5=pm(i9,m9);
f11=f0+f14, f3=f0-f14, f8=dm(i5,m1), f0=pm(i11,m9);
r14=pm(CFFT_N4m1,i14);
lcntr = r14, do _last_stage until lce; // loop N/4-1 times
f0=dm(i3,m1), f8=pm(i13,m9);
f10=f1*f5, f2=f9+f15, dm(i4,m1)=f3, pm(i12,m9)=f7;
f12=f2*f4, dm(i2,m1)=f11, pm(i10,m9)=f6;
f10=f1*f4, f13=f10+f12;
f12=f2*f5, f6=f8+f13, f7=f8-f13;
f14=f10-f12, f4=dm(i1,m1), f5=pm(i9,m9);
_last_stage:f11=f0+f14, f3=f0-f14, f8=dm(i5,m1), f0=pm(i11,m9);
dm(i2,m1)=f11, pm(i10,m9)=f6;
mode1=dm(SaveStack+0); // restore mode1 register
dm(i4,m1)=f3, pm(i12,m9)=f7; // let mode1 settle before we use the stack
exit;
_cfft_simd.end:
/*******************************************************************************
*
* Function: _cfft_simd_init - Initialization for _cfft_simd
*
* Synopsis: #include "cfft_simd.h"
* void cfft_simd(Tcfft_simd pm *pfft);
*
*
// prototype
//==================================================================================
//void cfft_simd_init(Tcfft_simd pm *pfft, // pointer to fft object
// Tcfft_simd_size N, // size of fft, must be a power of 2
// float *reinput, // pointer to real input data of size N
// float *imimput, // pointer to imag input data of size N
// float *retwid, // pointer to real twiddle array of size N/2
// float *imtwid, // pointer to imag twiddle array of size N/2
// float *refft, // pointer to real temp array of size N
// float *imfft); // pointer to imag temp array of size N
//==================================================================================
* Description:
* This function is used to initialize or construct a cfft_simd object.
* It takes the parameters that are passed to it and stores them in a
* structure of type Tcfft_simd defined in the file cfft_simd.h.
* The function _cfft_simd is passed a pointer to this stucture when it
* is called. Thus all information needed to compute an fft is stored
* in the structure.
*
* This function only needs to be called once during the prolog of the
* program that it is used in. After that, all that is needed to execute
* the FFT is the pointer to the structure mentioned above.
*
* If more than one FFT needs to be computed, a Tcfft_simd stucture needs to
* be created for each FFT. Twiddle factors can be shared between FFTs if
* the FFTs are the same size, but input, intermediate, and twiddle arrays
* all need to be unique.
* Caution:
* The object of type Tcfft_smid must be stored in a PM memory block.
*
* Author: Darrel Judd
* Judd Labs, Inc.
* 801-756-2057
* drjudd@ieee.org
*
* Revisions:
* Created June, 2003 - Darrel Judd
************************************************************************************/
.extern _cosf_sinf_simd;
.global _cfft_simd_init;
_cfft_simd_init:
i12=f4; // pointer to fft object
pm(CFFT_N,i12)=r8; // save N
r2=lshift r8 by -1; // N/2
pm(CFFT_N2,i12)=r2;
r1=-r2; // -N/2
pm(CFFT_mN2,i12)=r1;
r1=2;
r2=r2+r1; // N/2 + 2
pm(CFFT_N2p2,i12)=r2;
r2=lshift r8 by -2; // N/4
pm(CFFT_N4,i12)=r2;
r1=-r2; // -N/4
pm(CFFT_mN4,i12)=r1;
r2=r2-1;
pm(CFFT_N4m1,i12)=r2; // N/4-1
r2=lshift r8 by -3;
pm(CFFT_N8,i12)=r2;
f2=float r8;
r2=logb f2; // calc the number of stages
r1=5;
r2=r2-r1; // STAGES-5
pm(CFFT_STAGESm5,i12)=r2;
pm(CFFT_REAL_INPUT,i12)=r12;
i4=r12; // calc IREDM
bitrev(i4,0);
r0=i4;
pm(CFFT_IREDM,i12)=r0;
r0=reads(1); // calc IREPM
pm(CFFT_IMAG_INPUT,i12)=r0;
i4=r0;
bitrev(i4,0);
r0=i4;
pm(CFFT_IREPM,i12)=r0;
r2=reads(2);
pm(CFFT_TWID_REAL,i12)=r2;
r2=reads(3);
pm(CFFT_TWID_IMAG,i12)=r2;
r2=reads(4);
pm(CFFT_REAL_FFT,i12)=r2;
r2=reads(5);
pm(CFFT_IMAG_FFT,i12)=r2;
r1=pm(CFFT_REAL_INPUT,i12);
r2=pm(CFFT_N2,i12);
r0=r1+r2;
pm(CFFT_REAL_INPUTpN2,i12)=r0;
r1=pm(CFFT_IMAG_INPUT,i12);
r0=r1+r2;
pm(CFFT_IMAG_INPUTpN2,i12)=r0;
// calc the address real_fft + 8
r1=pm(CFFT_REAL_FFT,i12);
r2=8;
r0=r1+r2;
pm(CFFT_REAL_FFTp8,i12)=r0;
r4=pm(CFFT_N4,i12);
// calc the address real_fft + N/4
r0=r1+r4;
pm(CFFT_REAL_FFTpN4,i12)=r0;
// calc the address imag_fft + 8
r1=pm(CFFT_IMAG_FFT,i12);
r0=r1+r2;
pm(CFFT_IMAG_FFTp8,i12)=r0;
// calc the address imag_fft + N/4
r0=r1+r4;
pm(CFFT_IMAG_FFTpN4,i12)=r0;
i4=pm(CFFT_N2,i12);
bitrev(i4,0);
pm(CFFT_BRMODIFY,i12)=i4;
exit;
_cfft_simd_init.end:
.ENDSEG;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -