⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 fft512pt.asm

📁 是一个TS201的fft源代码
💻 ASM
📖 第 1 页 / 共 2 页
字号:
/************************************************************************  	fft512pt.asm	Prelim rev.	February 25, 2004 - more documentation to come	BL	This is assembly routine for the complex C-callable 512-point 16-bit FFT on	TigerSHARC family of DSPs.	I. Description of Calling.		1. Inputs:			j4 -> input			j5 -> ping_pong_buffer1			j6 -> ping_pong_buffer2			j7 -> output		2. C-Calling Example:			fft32(&(input), &(ping_pong_buffer1), &(ping_pong_buffer2), &(output));		3. Limitations:			a. All buffers must be aligned on memory boundary which is a multiple of 4.			b. Buffers input.and ping_pong_buffer2 must be aligned on memory boundary			   which is a multiple of 64.			c. If memory space savings are required and input does not have to be			   preserved, ping_pong_buffer1 can be the same buffer as input with no			   degradation in performance.			d. If memory space savings are required, output can be the same buffer			   as ping_pong_buffer2 with no degradation in performance.		4. For the code to yield optimal performance, the following must be observed:			a. Buffer input must have been cached previously. This is reasonable to			   assume since any engine that would have brought the data into internal			   memory, such as a DMA, would also have cached it.			b. input and ping_pong_buffer2 must be located in different memory blocks.			c. ping_pong_buffer1 and ping_pong_buffer2 must be located in different			   memory blocks.			d. ping_pong_buffer1 and output must be located in different memory blocks.			e. twiddles and input must be located in different memory blocks.			f. AdjustMatrix and ping_pong_buffer1 must be located in different memory			   blocks.	II. Description of the FFT algorithm.		1. All data is treated as complex packed data.		2. An application note will be provided for the description of the math of		   the algorithm.************************************************************************///************************* Includes ************************************//#include <defTS201.h>//***********************************************************************.section data2a;.align 4;                                                      			// allign to quad.var _AdjustMatrix[512] = "MatrixCoeffs.dat";.align 4;                                                       		// allign to quad.var _twiddles16[64] = "Twiddles16.dat";.var _dummy1[4];                                                        // Loop reads from this location on its exit, does not use the actual value.align 4;                                                       		// allign to quad.var _twiddles32[16] = "Twiddles32.dat";								// W32_1, W32_2,...,W32_15.var _dummy2[4];                                                        // Loop reads from this location on its exit, does not use the actual value.align 4;                                                       		// allign to quad.var _k_modifies[8] = 	0,   64,   -32,   32,						0,   32,   -16,   16;.align 4;                                                       		// allign to quad.var _n_508[4] = 508, 508, 508, 508;//**********************************************************************************************.section program;.global _fft512pt;//************************************** Start of code *****************************************_fft512pt://PROLOGUEJ26 = J27 - 64;				K26 = K27 - 64;;J27 = J27 - 28;  			K27 = K27 - 20;;q[J27 + 24] = XR27:24;		q[K27 + 16] = YR27:24;;q[J27 + 20] = XR31:28;		q[K27 + 12] = YR31:28;;//**********************************************************************************************//                                     VERTICAL FFTs//**********************************************************************************************//*************************************** Stage 1 **********************************************// From j0->_input to k3->_ping_pong_buffer2	j0=j31+j4;			k7:4=Q[k31+_k_modifies];;	j1=j0+256;			j8=64;;	j2=j0+128;			k3=j6;;	j3=j0+(256+128);;																																// ----------------------------------	r5:4  =br Q[j2+=64];jL0=508;;																										//| F1    |       |     |       |    |	r7:6  =br Q[j3+=j8]; r31=0x80000000;;																						//| F2    |       |     |       |    |	r1:0  =br Q[j0+=j8]; kL3:0=Q[k31+_n_508];;																							//| F3    |       |     |       |    |	r3:2  =br Q[j1+=j8]; kB3=k3+4;												sr13:12=r5:4+r7:6, 	   sr15:14=r5:4-r7:6;;		//| F4    |       |     | AS1   |    |																																// ----------------------------------	r5:4  =br Q[j2+=j8]; kB2=k31+_twiddles16;;																										//| F1+   |       |     |       |    |	r7:6  =br Q[j3+=j8]; j9=-92;						   mr1:0+=r14**r31(CR);	sr9:8=r1:0+r3:2,       sr11:10=r1:0-r3:2;;		//| F2+   | MPY1  |     | AS2   |    |	r1:0  =br Q[j0+=j8]; kL2=k31+64;			r24=mr1:0, mr1:0+=r15**r31(CR);;													//| F3+   | MPY2  | M1  |       |    |	r3:2  =br Q[j1+=j8]; LC1=8;					r25=mr1:0, mr1:0+=r15**r31(CR);	sr29:28=r5:4+r7:6, 	   sr15:14=r5:4-r7:6;;		//| F4+   |       | M2  | AS1+  |    |																																// ----------------------------------	r5:4  =br Q[j2+=j8]; jB0=kB3;												sr17:16=r9:8+r13:12,   sr21:20=r9:8-r13:12;;	//| F1++  |       |     | AS3   |    |.align_code 4;                                                      															// ----------------------------------_VerFFTStage1Inner:																												// ----------------------------------	r7:6  =br Q[j3+=j8]; cb Q[k3+=k5]=r17:16;			   mr1:0+=r14**r31(CR);	sr9:8=r1:0+r3:2,       sr27:26=r1:0-r3:2;;	r1:0  =br Q[j0+=j8]; cb Q[k3+=k6]=r21:20;	r24=mr1:0, mr1:0+=r15**r31(CR);	sr19:18=r11:10+r25:24, sr23:22=r11:10-r25:24;;	r3:2  =br Q[j1+=j8]; cb Q[k3+=k5]=r19:18;	r25=mr1:0, mr1:0+=r15**r31(CR);	sr13:12=r5:4+r7:6, 	   sr15:14=r5:4-r7:6;;																											// F4	r5:4  =   Q[j2+=j9]; cb Q[k3+=k7]=r23:22;									sr17:16=r9:8+r29:28,   sr21:20=r9:8-r29:28;;	r7:6  =   Q[j3+=j9]; cb Q[k3+=k5]=r17:16;			   mr1:0+=r14**r31(CR);	sr9:8=r1:0+r3:2,       sr11:10=r1:0-r3:2;;	r1:0  =   Q[j0+=j9]; cb Q[k3+=k6]=r21:20;	r24=mr1:0, mr1:0+=r15**r31(CR);	sr19:18=r27:26+r25:24, sr23:22=r27:26-r25:24;;	r3:2  =   Q[j1+=j9]; cb Q[k3+=k5]=r19:18;	r25=mr1:0, mr1:0+=r15**r31(CR);	sr29:28=r5:4+r7:6, 	   sr15:14=r5:4-r7:6;;																											// F4	r5:4  =br Q[j2+=j8]; cb Q[k3+=k7]=r23:22;									sr17:16=r9:8+r13:12,   sr21:20=r9:8-r13:12;;	       				 k2=k31+(_twiddles16+2);;	r7:6  =br Q[j3+=j8]; cb Q[k3+=k5]=r17:16;			   mr1:0+=r14**r31(CR);	sr9:8=r1:0+r3:2,       sr27:26=r1:0-r3:2;;	r1:0  =br Q[j0+=j8]; cb Q[k3+=k6]=r21:20;	r24=mr1:0, mr1:0+=r15**r31(CR);	sr19:18=r11:10+r25:24, sr23:22=r11:10-r25:24;;	r3:2  =br Q[j1+=j8]; cb Q[k3+=k5]=r19:18;	r25=mr1:0, mr1:0+=r15**r31(CR);	sr13:12=r5:4+r7:6, 	   sr15:14=r5:4-r7:6;;																											// F4	r5:4  =br Q[j2+=j8]; cb Q[k3+=k7]=r23:22;									sr17:16=r9:8+r29:28,   sr21:20=r9:8-r29:28;;	r7:6  =br Q[j3+=j8]; cb Q[k3+=k5]=r17:16;			   mr1:0+=r14**r31(CR);	sr9:8=r1:0+r3:2,       sr11:10=r1:0-r3:2;;	r1:0  =br Q[j0+=j8]; cb Q[k3+=k6]=r21:20;	r24=mr1:0, mr1:0+=r15**r31(CR);	sr19:18=r27:26+r25:24, sr23:22=r27:26-r25:24;;	r3:2  =br Q[j1+=j8]; cb Q[k3+=k5]=r19:18;	r25=mr1:0, mr1:0+=r15**r31(CR);	sr29:28=r5:4+r7:6, 	   sr15:14=r5:4-r7:6;;																											// F4.align_code 4;	if NLC1E, jump _VerFFTStage1Inner;	r5:4  =br Q[j2+=j8]; cb Q[k3+=k7]=r23:22;									sr17:16=r9:8+r13:12,   sr21:20=r9:8-r13:12;;//***************************************** Stage 2 ***********************************************// From j0->_ping_pong_buffer2 to k1->_ping_pong_buffer1.align_code 4;	j0=j6+12*32;			j1=-4*32;;	r7:6  =   Q[j0+=-4*32];	r31:30=   L[k2+=-2];;																										//| F1    |       |     |       |    |	r5:4  =cb Q[j0+=-4*32];	r29:28=cb L[k2+=6];				mr1:0+=r7**r31(CR);;	r3:2  =   Q[j0+=-4*32];	LC0=15;				r15=mr1:0, 	mr1:0+=r6**r31(CR);;													//| F3+   | MPY2  | M1  |       |    |	r1:0  =cb Q[j0+=28*32];	j2=28*32;			r14=mr1:0, 	mr1:0+=r5**r30(CR);;													//| F3+   | MPY2  | M1  |       |    |	r7:6  =cb Q[j0+=-4*32];	k1=j5;				r13=mr1:0, 	mr1:0+=r4**r30(CR);;													//| F3+   | MPY2  | M1  |       |    |	r5:4  =cb Q[j0+=j1];						r12=mr1:0, 	mr1:0+=r3**r29(CR);;													//| F3+   | MPY2  | M1  |       |    |												r11=mr1:0, 	mr1:0+=r2**r29(CR);;													//| F3+   | MPY2  | M1  |       |    |												r10=mr1:0, 	mr1:0+=r1**r28(CR);;													//| F3+   | MPY2  | M1  |       |    |	r3:2  =   Q[j0+=j1];						r9=mr1:0, 	mr1:0+=r0**r28(CR);;													//| F3+   | MPY2  | M1  |       |    |	r1:0  =cb Q[j0+=28*32];						r8=mr1:0, 	mr1:0+=r7**r31(CR);	sr21:20=r13:12+r15:14,	sr23:22=r13:12-r15:14;;	// AS1												r15=mr1:0, 	mr1:0+=r6**r31(CR);;							kB1=k1+4;			r14=mr1:0, 	mr1:0+=r5**r30(CR);	sr17:16=r9:8  +r11:10,	sr19:18=r9:8  -r11:10;;	// AS2	r7:6  =cb Q[j0+=j1];	r8=r23;				r13=mr1:0, 	mr1:0+=r4**r30(CR);	sr9=-r23;;										// A2	r5:4  =cb Q[j0+=j1];	r31:30=cb L[k2+=-2];r12=mr1:0, 	mr1:0+=r3**r29(CR);	sr23=-r22;;										// A1												r11=mr1:0, 	mr1:0+=r2**r29(CR);	lr9:8=rot r9:8 by -16;;							// R2							k6=k31+4*32;		r10=mr1:0, 	mr1:0+=r1**r28(CR);	lr23:22=rot r23:22 by -16;;						// R1.align_code 4;                                                       															// ----------------------------------_VerFFTStage2:																													// ----------------------------------	r3:2  =   Q[j0+=j1];	r23=r8;				r9=mr1:0, 	mr1:0+=r0**r28(CR);	sr17:16=r17:16+r21:20,	sr21:20=r17:16-r21:20;;	// AS3	r1:0  =cb Q[j0+=j2];	r29:28=cb L[k2+=6]; r8=mr1:0, 	mr1:0+=r7**r31(CR);	sr25:24=r13:12+r15:14,	sr27:26=r13:12-r15:14;;	// AS1+	cb Q[k1+=k6]=r17:16;						r15=mr1:0, 	mr1:0+=r6**r31(CR);	sr19:18=r19:18+r23:22,	sr23:22=r19:18-r23:22;;	// AS4	cb Q[k1+=k6]=r19:18;						r14=mr1:0, 	mr1:0+=r5**r30(CR);	sr17:16=r9:8  +r11:10,	sr19:18=r9:8  -r11:10;;	// AS2+	r7:6  =cb Q[j0+=j1];	r8=r27;				r13=mr1:0, 	mr1:0+=r4**r30(CR);	sr9=-r27;;										// A2+	r5:4  =cb Q[j0+=j1];						r12=mr1:0, 	mr1:0+=r3**r29(CR);	sr27=-r26;;										// A1+	cb Q[k1+=k6]=r21:20;						r11=mr1:0, 	mr1:0+=r2**r29(CR);	lr9:8=rot r9:8 by -16;;							// R2+	cb Q[k1+=k6]=r23:22;						r10=mr1:0, 	mr1:0+=r1**r28(CR);	lr27:26=rot r27:26 by -16;;						// R1+	r3:2  =   Q[j0+=j1];	r27=r8;				r9=mr1:0, 	mr1:0+=r0**r28(CR);	sr17:16=r17:16+r25:24,	sr25:24=r17:16-r25:24;;	// AS3	r1:0  =cb Q[j0+=j2];						r8=mr1:0, 	mr1:0+=r7**r31(CR);	sr21:20=r13:12+r15:14,	sr23:22=r13:12-r15:14;;	// AS1	cb Q[k1+=k6]=r17:16;						r15=mr1:0, 	mr1:0+=r6**r31(CR);	sr19:18=r19:18+r27:26,	sr27:26=r19:18-r27:26;;	// AS4+	cb Q[k1+=k6]=r19:18;						r14=mr1:0, 	mr1:0+=r5**r30(CR);	sr17:16=r9:8  +r11:10,	sr19:18=r9:8  -r11:10;;	// AS2	r7:6  =cb Q[j0+=j1];	r8=r23;				r13=mr1:0, 	mr1:0+=r4**r30(CR);	sr9=-r23;;										// A2	r5:4  =cb Q[j0+=j1];	r31:30=cb L[k2+=-2];r12=mr1:0, 	mr1:0+=r3**r29(CR);	sr23=-r22;;										// A1	cb Q[k1+=k6]=r25:24;						r11=mr1:0, 	mr1:0+=r2**r29(CR);	lr9:8=rot r9:8 by -16;;							// R2.align_code 4;                                                       															// ----------------------------------	if NLC0E, jump _VerFFTStage2;	                     																		// ----------------------------------	cb Q[k1+=k6]=r27:26;						r10=mr1:0, 	mr1:0+=r1**r28(CR);	lr23:22=rot r23:22 by -16;;						// R1.align_code 4;	jB0=j5+4;				r23=r8;				r9=mr1:0, 	mr1:0+=r0**r28(CR);	sr17:16=r17:16+r21:20,	sr21:20=r17:16-r21:20;;	// AS3	j0=j31+j5;             	j1=j6;				r8=mr1:0, 	mr1:0+=r7**r31(CR);	sr25:24=r13:12+r15:14,	sr27:26=r13:12-r15:14;;	// AS1+	cb Q[k1+=k6]=r17:16;    kL2=508;									        sr19:18=r19:18+r23:22,	sr23:22=r19:18-r23:22;;	// AS4	cb Q[k1+=k6]=r19:18;	kB2=_AdjustMatrix+4;								sr17:16=r9:8  +r11:10,	sr19:18=r9:8  -r11:10;;	// AS2+	cb Q[k1+=k6]=r21:20;;	k2=k31+_AdjustMatrix;	r8=r27;												sr9=-r27;;										// A2+	cb Q[k1+=k6]=r23:22;														sr27=-r26;;	r1:0=   Q[j0+=32];  	r29:28=cb Q[k2+=32];								lr9:8=rot r9:8 by -16;;							// R2+	r3:2=   Q[j0+=32];  	r31:30=cb Q[k2+=32];								lr27:26=rot r27:26 by -16;;						// R1+							r27=r8;												sr17:16=r17:16+r25:24,	sr25:24=r17:16-r25:24;;	// AS3+//*************************************** MPY/Xpose **********************************************// From j0->ping_pong_buffer1 to j1->ping_pong_buffer2	r5:4=cb Q[j0+=32];  	r21:20=cb Q[k2+=32];				mr1:0+=r0**r28(CR);;	r7:6=cb Q[j0+=32];  	r23:22=cb Q[k2+=32];	r8=mr1:0,  	mr1:0+=r1**r29(CR);;	cb Q[k1+=k6]=r17:16;							r12=mr1:0, 	mr1:0+=r2**r30(CR);	sr19:18=r19:18+r27:26,	sr27:26=r19:18-r27:26;;	// AS4+	cb Q[k1+=k6]=r19:18;           					r9=mr1:0,  	mr1:0+=r3**r31(CR);;	cb Q[k1+=k6]=r25:24;	       					r13=mr1:0, 	mr1:0+=r4**r20(CR);;	cb Q[k1+=k6]=r27:26;	LC1=8; 					r10=mr1:0, 	mr1:0+=r5**r21(CR);;.align_code 4;_MultXposeLoopOuter:	r1:0=cb Q[j0+=32];  	r17:16=cb Q[k2+=32];	r14=mr1:0, mr1:0+=r6**r22(CR);;	r3:2=cb Q[j0+=32];  	r19:18=cb Q[k2+=32];	r11=mr1:0, mr1:0+=r7**r23(CR);;	r5:4=cb Q[j0+=32];  	r21:20=cb Q[k2+=32];	r15=mr1:0, mr1:0+=r0**r16(CR);;	r7:6=cb Q[j0+=32];  	r23:22=cb Q[k2+=32];	r24=mr1:0, mr1:0+=r1**r17(CR);;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -