⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 cr2fftnasm_outplace.asm

📁 快速FFT,汇编原程序
💻 ASM
📖 第 1 页 / 共 2 页
字号:
*  scaling purpose.
*  All the data are stored to output buffer after the division by 2, for scaling 
*  purpose.
*/

/*
*  These four instructions reads the input data in R2, R3, R4 and R5 registers from
*  bit reversed locations. The first stage computation is done. It is taken out of the
*  loop for optimization point of view.
*/

		I0 += M0 (BREV) 		|| R2 = [I0];   //R2 reads the data
 	    I0 += M0 (BREV) 		|| R3 = [I0];
		I0 += M0 (BREV) 		|| R4 = [I0];		

		CC = P0 <= 0 (IU);
		If CC Jump ESC_12;

		lsetup(Stage12_strt, Stage12_end) LC0 = P0; 
	Stage12_strt:
		R2 = R2 +|+ R3,  R3 = R2 -|- R3 (ASR) 		|| I0 += M0 (BREV) 		|| R5 = [I0];
		R4 = R4 +|+ R5, R5 = R4 -|- R5 (ASR, CO)	|| I0 += M0 (BREV) 		|| R0 = [I0];
		R2 = R2 +|+ R4, R4 = R2 -|- R4 (ASR)		|| I0 += M0 (BREV) 		|| R1 = [I0];
		R5 = R3 +|- R5, R3 = R3 -|+ R5 (ASR)		|| I0 += M0 (BREV) 		|| R6 = [I0];

		R0 = R0 +|+ R1, R1 = R0 -|- R1 (ASR) 		|| R7 = [I0]			|| [I2++] = R2;
		R6 = R6 +|+ R7, R7 = R6 -|- R7 (ASR, CO) 	|| I0 += M0 (BREV) 		|| [I2++] = R3;
		R0 = R0 +|+ R6, R6 = R0 -|- R6 (ASR)		|| R2 = [I0]			|| [I2++] = R4;
		R7 = R1 +|- R7, R1 = R1 -|+ R7 (ASR)		|| I0 += M0 (BREV)      || [I2++] = R5;

		R3 = [I0] 			||		[I2++] = R0;
 	    I0 += M0 (BREV)		||		[I2++] = R1;
		R4 = [I0]			||		[I2++] = R6;		
	Stage12_end:I0 += M0 (BREV)		||		[I2++] = R7;

	ESC_12: 
		R2 = R2 +|+ R3,  R3 = R2 -|- R3 (ASR) 		|| I0 += M0 (BREV) 		|| R5 = [I0];
		R4 = R4 +|+ R5, R5 = R4 -|- R5 (ASR, CO)	|| I0 += M0 (BREV) 		|| R0 = [I0];
		R2 = R2 +|+ R4, R4 = R2 -|- R4 (ASR)		|| I0 += M0 (BREV) 		|| R1 = [I0];
		R5 = R3 +|- R5, R3 = R3 -|+ R5 (ASR)		|| I0 += M0 (BREV) 		|| R6 = [I0];

		R0 = R0 +|+ R1, R1 = R0 -|- R1 (ASR) 		|| [I2++] = R2			|| R7 = [I0];
		R6 = R6 +|+ R7, R7 = R6 -|- R7 (ASR, CO) 	|| [I2++] = R3			|| NOP;
		R0 = R0 +|+ R6, R6 = R0 -|- R6 (ASR)		|| [I2++] = R4			|| NOP;
		R7 = R1 +|- R7, R1 = R1 -|+ R7 (ASR)		|| [I2++] = R5			|| NOP;

		[I2++] = R0;
		[I2++] = R1;
 	    [I2++] = R6;
		[I2++] = R7;

/***************************************************************************************/

/*
*  Here the register values are modified for the execution of middle stage. This satge is 
*  escaped through Esc_mid label if the input size is equal to 8.
*/


		R1 = P1;                          //R1 = wst.
		R1 = R1 << 2;                     //R1 = wst * 4
		P3 = 4;                //P3 holds the number of lines in each butterfly at stage 3.
		R7 = P5;  
        R7.L = R1.L * R7.L (IS);    //R7 = wst * 4  * twiddle offset
		R2 = P4;  
		R3 = 8;
		CC = R2 == R3;           //If input array size is equal to 8, then go to last stage, because
		If CC Jump Esc_mid;      //middle stages does n't occur.
		R0 = 0;        //Counter for number of stages.

	Find_m:            //The computation of number of stages is done here.
		R2 >>= 1;
		R0 += 1;
		CC = R2 == R3;
		If !CC Jump Find_m;  //R0 holds the value of m-3 and is never free

/************************* Implementation of Middle Part *******************************/

/*
* First of all, a loop for the number of stages - 3 is set. It is a general implementation
* of butterfly computation. The first nested loop is set for half of the number of butter-
* flies at each stage. The second nested loop is set for the number of lines in each butt-
* erfly. The computation is done on the output buffer. The output is stored after dividing
* by 2 for scaling purpose. In one loop two butterfly data are read and processed.
*
* The input is read from output buffer and after the computation at this stage the output
* is written back to  output buffer.
*/
	
//B2 Input, B0 output.	
	Loopfor_m:
		I0 = B0;
		I2 = B2;
		I3 = B2;             //Address of output array.
		P0 = P3 << 2; 
		M2 = P0;             //M2 holds the offset of counterpart line.
		P0 += -4;
		M0 = P0;
	
		P5 = P5 >> 1;
		R7 = R7 >>> 1 || I3 += M2 || NOP;
 	    M1 = R7;
		P3 += -1;
		lsetup(Loop1_strt, Loop1_end) LC0 = P5;  //Loop is set for number of the butterfly
	Loop1_strt:                         			
		I1 = B3;				//Address of twiddle factor.
		R2 = [I2++];
		R3 = [I1++M1]  || R4 = [I3++];

		lsetup(Loop2_strt, Loop2_end) LC1 = P3;    //Loop is set for the number of lines
	Loop2_strt:
		R5 = R2 +|+ R4, R6 = R2 -|- R4 (ASR) 							|| R3 = [I1++M1] || R4 = [I3++];   //R3 = [P0++P2]; 
		A1 = R3.L * R4.H, A0 = R3.L * R4.L 								|| [I0++M2] = R5 || R2 = [I2++]; 
	Loop2_end:R4.H = (A1 += R3.H * R4.L), R4.L = ( A0 -= R3.H * R4.H) 	|| I0 -= M0				|| [I0] = R6;

		R5 = R2 +|+ R4, R6 = R2 -|- R4 (ASR) 							|| I2 += M2				|| NOP;
		I3 += M2  || [I0++M2] = R5;   
	Loop1_end: [I0++] = R6;

		P3 += 1; 
		P3 = P3 << 1;
		R0 += -1;
		B1 = B0;
		B0 = B2;
		B2 = B1;
		CC = R0 == 0;
		If !CC Jump Loopfor_m;   //Loop for m.

/***************************************************************************************/

/************************* Implementation of Last Part *********************************/

/*
* This part implements the last stage of the butterfly. The label Esc_mid is used
* when the size of input data is 8. In this case the computation of middle stages have
* to be escaped. The increment in the twiddle factor offset is just 1. In the last stage
* there is only one butterfly. The loop is set for n/4. 4 data are read and processed at
* the same time.
*  
* The input is read from the output buffer and after the computation, the final output
* is written to output buffer.
*/

//B2 Input , B0 Output.

Esc_mid:I0 = B0;
		I2 = B2;
		I3 = B2;             //Address of output array.
		P0 = P3 << 2; 
		M2 = P0;             //M2 holds the offset of counterpart line.
		P0 += -4;
		M0 = P0;
		R7 = R7 >>> 1 || I3 += M2 || NOP; 
		M1 = R7;
		P3 += -1;
		I1 = B3;      //Address of twiddle factor.
		R2 = [I2++];
		R3 = [I1++M1]  || R4 = [I3++];

		lsetup(Last_strt, Last_end) LC1 = P3;    //Loop is set for the number of lines
	Last_strt:
		R5 = R2 +|+ R4, R6 = R2 -|- R4 (ASR) 							|| R3 = [I1++M1]		|| R4 = [I3++];
		A1 = R3.L * R4.H, A0 = R3.L * R4.L 								|| [I0++M2] = R5		|| R2 = [I2++]; 
	Last_end:R4.H = (A1 += R3.H * R4.L), R4.L = ( A0 -= R3.H * R4.H) 	|| I0 -= M0				|| [I0] = R6;

		R5 = R2 +|+ R4, R6 = R2 -|- R4 (ASR) 							|| NOP					|| NOP;
		[I0++M2] = R5;   
		[I0] = R6;

		R0 = B0;
		R1 = M3;
		CC = R0 == R1;
		If CC Jump Terminate;
		I0 = B0;
		I1 = M3;
		R0 = [I0++];
		lsetup(Copy_strt, Copy_strt) LC0 = P4;
		Copy_strt: [I1++] = R0 || R0 = [I0++];

/***************************************************************************************/
	
	Terminate:
		(R7:4, P5:3) = [SP++];           //Pop the registers before returning.
		RTS;                             //Return.

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -