⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 fft.asm

📁 在ADSP-2126x上编写的优化过的FFT程序(用c和汇编编写)。
💻 ASM
📖 第 1 页 / 共 2 页
字号:
			r6 = r1 + 1, m3 = r1;
			r6 = r6 + 1, r0 = m2;
			r0 = lshift r0 by -2;
			r2 = r0 - 1, m11 = r1;
			r0 = r0 + r0, i5 = i4;
			r2 = r2 - 1, m2 = r0;
			m10 = r0;
			i13 = i12;

			f4=dm(i1,m2), f5=pm(i9,m10);										/* real twiddle, imag twiddle */
			f1=dm(i5,m4), f0=pm(i13,m12);

			f10=f1*f4, m0 = r6;						
			f12=f0*f5, m8 = r6;
			f11=f1*f5, f9=dm(i5,m3), f8=pm(i13,m11);
			f13=f0*f4, f14=f10-f12, f1=dm(i5,m4), f0=pm(i13,m12);

			f10=f1*f4, f15=f11+f13, ustat1 = r3;						
			f12=f0*f5, f6=f8+f15, f2=f8-f15, ustat3 = r2;

			lcntr = ustat1, do _mid_loop until lce;
		
				lcntr = ustat3, do _in_loop until lce;

					f11=f1*f5, f7=f9+f14, f3=f9-f14, f9=dm(i5,m3), f8=pm(i13,m11);
					f13=f0*f4, f14=f10-f12, f1=dm(i5,m4), f0=pm(i13,m12);
					f10=f1*f4, f15=f11+f13, dm(i4,m4)=f3, pm(i12,m12)=f2;						
_in_loop:			f12=f0*f5, f6=f8+f15, f2=f8-f15, dm(i4,m3)=f7, pm(i12,m11)=f6;
			
				f11=f1*f5, f7=f9+f14, f3=f9-f14, f9=dm(i5,m0), f8=pm(i13,m8);
				f13=f0*f4, f14=f10-f12, f1=dm(i5,m4), f0=pm(i13,m12);

				f4=dm(i1,m2), f5=pm(i9,m10);									/* real twiddle, imag twiddle */

				f10=f1*f4, f15=f11+f13, dm(i4,m4)=f3, pm(i12,m12)=f2;						
				f12=f0*f5, f6=f8+f15, f2=f8-f15, dm(i4,m3)=f7, pm(i12,m11)=f6;

				f11=f1*f5, f7=f9+f14, f3=f9-f14, f9=dm(i5,m3), f8=pm(i13,m11);
				f13=f0*f4, f14=f10-f12, f1=dm(i5,m4), f0=pm(i13,m12);

				f10=f1*f4, f15=f11+f13, dm(i4,m4)=f3, pm(i12,m12)=f2;						
_mid_loop:		f12=f0*f5, f6=f8+f15, f2=f8-f15, dm(i4,m0)=f7, pm(i12,m8)=f6;

			i1 = pm(CFFT_TWID_REAL,i14); // Pointer to cosine table
_out_loop:	i9 = pm(CFFT_TWID_IMAG,i14); // Pointer to sine table

/******************** 3rd To Last Stage *********************/
_end_2:
		i5 = pm(CFFT_REAL_FFTpN4,i14);	// Pointer to real_fft+N/4
		i13 = pm(CFFT_IMAG_FFTpN4,i14);	// Pointer to real_fft+N/4												
		m2 = 4;
		m10 = 4;
		r2 = pm(CFFT_mN4,i14);	// read -N/4
		r3 = r2 + 1, m3 = r2;
		r3 = r3 + 1, m11 = r2;

		f4=dm(i1,m2), f5=pm(i9,m10);											/* real twiddle, imag twiddle */
		f1=dm(i5,m3), f0=pm(i13,m11);
		f10=f1*f4, m0 = r3;						
		f12=f0*f5, m8 = r3;
		f11=f1*f5, f9=dm(i5,m3), f8=pm(i13,m11);
		f13=f0*f4, f14=f10-f12, f1=dm(i5,m3), f0=pm(i13,m11);
		f10=f1*f4, f15=f11+f13;						
		f12=f0*f5, f6=f8+f15, f2=f8-f15;

		lcntr = m14, do _mid_loop3 until lce; // loop N/8 times
			
			f11=f1*f5, f7=f9+f14, f3=f9-f14, f9=dm(i5,m0), f8=pm(i13,m8);
			f13=f0*f4, f14=f10-f12, f1=dm(i5,m3), f0=pm(i13,m11);
			f4=dm(i1,m2), f5=pm(i9,m10);
			f10=f1*f4, f15=f11+f13, dm(i4,m3)=f3, pm(i12,m11)=f2;						
			f12=f0*f5, f6=f8+f15, f2=f8-f15, dm(i4,m3)=f7, pm(i12,m11)=f6;
			f11=f1*f5, f7=f9+f14, f3=f9-f14, f9=dm(i5,m3), f8=pm(i13,m11);
			f13=f0*f4, f14=f10-f12, f1=dm(i5,m3), f0=pm(i13,m11);
			f10=f1*f4, f15=f11+f13, dm(i4,m3)=f3, pm(i12,m11)=f2;
_mid_loop3:	f12=f0*f5, f6=f8+f15, f2=f8-f15, dm(i4,m0)=f7, pm(i12,m8)=f6;
			
/******************** Pre-Last Stage *********************/
_end_3:
		r2 = pm(CFFT_N2p2,i14);	// read N/2 + 2;
		m2 = pm(CFFT_mN2,i14);	// read -N/2;
		m10 = m2;
		i1 = b1;
		i9 = b9;

		f1=dm(i4,m2), f0=pm(i12,m10);
		f4=dm(i1,m1), f5=pm(i9,m9);												/* real twiddle, imag twiddle */
		f10=f1*f4, m3 = r2;						
		f12=f0*f5, m11 = r2;
		f11=f1*f5, f9=dm(i4,m3), f8=pm(i12,m11);
		f13=f0*f4, f14=f10-f12, f1=dm(i5,m2), f0=pm(i13,m10);
		f10=f1*f5, f15=f11+f13, i2=i5;							
		f12=f0*f4, f7=f9+f14, f3=f9-f14, i10=i13;
		
		lcntr = m14, do _mid_loop4 until lce;	// loop N/8 times
		
			f11=f1*f4, f6=f8+f15, f2=f8-f15, f9=dm(i5,m3), f8=pm(i13,m11);
			f13=f0*f5, f14=f10+f12, f1=dm(i4,m2), f0=pm(i12,m10);
			f4=dm(i1,m1), f5=pm(i9,m9);											/* real twiddle, imag twiddle */
			f10=f1*f4, f15=f11-f13, dm(i3,m2)=f7, pm(i11,m10)=f6;				/* cmult (f1,f0)*(f4,f5) */	
			f12=f0*f5, f7=f9+f14, f3=f9-f14, dm(i3,m3)=f3, pm(i11,m11)=f2;
			f11=f1*f5, f2=f8+f15, f6=f8-f15, f9=dm(i4,m3), f8=pm(i12,m11);
			f13=f0*f4, f14=f10-f12, f1=dm(i5,m2), f0=pm(i13,m10);
			f10=f1*f5, f15=f11+f13, dm(i2,m2)=f7, pm(i10,m10)=f6;				/* cmult (f1,f0)*(f4,f5) */	
_mid_loop4:	f12=f0*f4, f7=f9+f14, f3=f9-f14, dm(i2,m3)=f3, pm(i10,m11)=f2;
			
/******************** Last Stage ***********************/
_end_4:
		bit clr MODE1 BDCST1 | BDCST9;		
		
		b2 = pm(CFFT_REAL_INPUT,i14);		// read pointer to real_input
		b4 = pm(CFFT_REAL_INPUTpN2,i14);	// read pointer to real_input + N/2
		b10 = pm(CFFT_IMAG_INPUT,i14);		// read pointer to imag_input
		b12 = pm(CFFT_IMAG_INPUTpN2,i14);	// read pointer to imag_input + N/2
		r1 = -1;
		f15 = 0.0;		


		r0 = pm(CFFT_REAL_FFT,i14);			// read pointer to real_fft
		r0 = lshift r0 by r1, i1 = b1;
		r0 = r0 + 1, b3 = r0;
		b11 = r0;
		r0 = pm(CFFT_IMAG_FFT,i14);			// read pointer to imag_fft
		r0 = lshift r0 by r1, i9 = b9;
		r0 = r0 + 1, b5 = r0;
		b13 = r0;

		f4=dm(i1,m1), f5=pm(i9,m9);
		f0=dm(i3,m1), f8=pm(i13,m9);
		f8=dm(i5,m1), f0=pm(i11,m9);
		f10=f1*f5, f2=f9+f15; 
		f12=f2*f4;	
		f10=f1*f4, f13=f10+f12;
		f12=f2*f5, f6=f8+f13, f7=f8-f13;
		f14=f10-f12, f4=dm(i1,m1), f5=pm(i9,m9);
		f11=f0+f14, f3=f0-f14, f8=dm(i5,m1), f0=pm(i11,m9);	
		r14=pm(CFFT_N4m1,i14);
		lcntr = r14, do _last_stage until lce;	// loop N/4-1 times

			f0=dm(i3,m1), f8=pm(i13,m9);
			f10=f1*f5, f2=f9+f15, dm(i4,m1)=f3, pm(i12,m9)=f7; 
			f12=f2*f4, dm(i2,m1)=f11, pm(i10,m9)=f6;								
			f10=f1*f4, f13=f10+f12;
			f12=f2*f5, f6=f8+f13, f7=f8-f13;								
			f14=f10-f12, f4=dm(i1,m1), f5=pm(i9,m9);
_last_stage:f11=f0+f14, f3=f0-f14, f8=dm(i5,m1), f0=pm(i11,m9);	
			
		dm(i2,m1)=f11, pm(i10,m9)=f6;
		mode1=dm(SaveStack+0);			// restore mode1 register
		dm(i4,m1)=f3, pm(i12,m9)=f7;	// let mode1 settle before we use the stack	
		exit;
_cfft_simd.end:

/*******************************************************************************
*
* Function:  _cfft_simd_init - Initialization for _cfft_simd
*
* Synopsis:  #include "cfft_simd.h"
*            void cfft_simd(Tcfft_simd pm *pfft);
*
*
// prototype		
//==================================================================================
//void cfft_simd_init(Tcfft_simd pm *pfft,	// pointer to fft object
//						  Tcfft_simd_size N,			// size of fft, must be a power of 2
//						  float *reinput,		// pointer to real input data of size N
//						  float *imimput,		// pointer to imag input data of size N
//						  float *retwid,		// pointer to real twiddle array of size N/2
//						  float *imtwid,		// pointer to imag twiddle array of size N/2
//						  float *refft,		// pointer to real temp array of size N
//						  float *imfft);		// pointer to imag temp array of size N
//==================================================================================
* Description:  
*				This function is used to initialize or construct a cfft_simd object.  
*				It takes the parameters that are passed to it and stores them in a 
*				structure of type Tcfft_simd defined in the file cfft_simd.h.
*				The function _cfft_simd is passed a pointer to this stucture when it
*				is called.  Thus all information needed to compute an fft is stored 
*				in the structure.
*
*				This function only needs to be called once during the prolog of the 
*				program that it is used in.  After that, all that is needed to execute
*				the FFT is the pointer to the structure mentioned above.
*
*				If more than one FFT needs to be computed, a Tcfft_simd stucture needs to
*				be created for each FFT.  Twiddle factors can be shared between FFTs if
*				the FFTs are the same size, but input, intermediate, and twiddle arrays
*				all need to be unique.
* Caution:
*				The object of type Tcfft_smid must be stored in a PM memory block.
*
* Author:	Darrel Judd
*				Judd Labs, Inc.
*				801-756-2057
*				drjudd@ieee.org
*
* Revisions:
*				Created June, 2003 - Darrel Judd
************************************************************************************/
.extern _cosf_sinf_simd;
.global _cfft_simd_init;
_cfft_simd_init:
	i12=f4;	// pointer to fft object
	pm(CFFT_N,i12)=r8;		// save N
	r2=lshift r8 by -1;  	// N/2
	pm(CFFT_N2,i12)=r2;	
	r1=-r2;					// -N/2
	pm(CFFT_mN2,i12)=r1;
	r1=2;
	r2=r2+r1;				// N/2 + 2
	pm(CFFT_N2p2,i12)=r2;
	r2=lshift r8 by -2;		// N/4
	pm(CFFT_N4,i12)=r2;
	r1=-r2;					// -N/4
	pm(CFFT_mN4,i12)=r1;
	r2=r2-1;
	pm(CFFT_N4m1,i12)=r2;	// N/4-1
	r2=lshift r8 by -3;
	pm(CFFT_N8,i12)=r2;
	f2=float r8;
	r2=logb f2;				// calc the number of stages
	r1=5;
	r2=r2-r1;				// STAGES-5
	pm(CFFT_STAGESm5,i12)=r2;
	pm(CFFT_REAL_INPUT,i12)=r12;
	i4=r12; 				// calc IREDM
	bitrev(i4,0);
	r0=i4;
   pm(CFFT_IREDM,i12)=r0;
   r0=reads(1);				// calc IREPM
   pm(CFFT_IMAG_INPUT,i12)=r0;
   i4=r0;
	bitrev(i4,0);
	r0=i4;
	pm(CFFT_IREPM,i12)=r0;
	r2=reads(2);
	pm(CFFT_TWID_REAL,i12)=r2;
	r2=reads(3);
	pm(CFFT_TWID_IMAG,i12)=r2;
	r2=reads(4);
	pm(CFFT_REAL_FFT,i12)=r2;
	r2=reads(5);
	pm(CFFT_IMAG_FFT,i12)=r2;
	r1=pm(CFFT_REAL_INPUT,i12);
	r2=pm(CFFT_N2,i12);
	r0=r1+r2;
	pm(CFFT_REAL_INPUTpN2,i12)=r0;
	r1=pm(CFFT_IMAG_INPUT,i12);
	r0=r1+r2;
	pm(CFFT_IMAG_INPUTpN2,i12)=r0;
	// calc the address real_fft + 8
	r1=pm(CFFT_REAL_FFT,i12);
	r2=8;
	r0=r1+r2;
	pm(CFFT_REAL_FFTp8,i12)=r0;
	r4=pm(CFFT_N4,i12);
	// calc the address real_fft + N/4
	r0=r1+r4;
	pm(CFFT_REAL_FFTpN4,i12)=r0;
	// calc the address imag_fft + 8
	r1=pm(CFFT_IMAG_FFT,i12);
	r0=r1+r2;
	pm(CFFT_IMAG_FFTp8,i12)=r0;
	// calc the address imag_fft + N/4
	r0=r1+r4;
	pm(CFFT_IMAG_FFTpN4,i12)=r0;
	i4=pm(CFFT_N2,i12);
	bitrev(i4,0);
	pm(CFFT_BRMODIFY,i12)=i4;
	exit;
_cfft_simd_init.end:

.ENDSEG;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -