📄 fir.asm

📁 关于DSP的参考实验成程序
💻 ASM
字号:
/*******************************************************************************************
Copyright(c) 2000 Analog Devices/Intel
Developed by JD(FRIO) Software Application Team, IPDC, Bangalore, India
********************************************************************************************
File Name      : fir_fr16.asm
Module Name    : Finite Impulse response filter 
Function Name  : __fir

Description    : This function performs FIR filter operation on given input.
Operands	   : R0- Address of input vector, R1-Address of output vector,
               : R2- Number of input elements	
			   : Filter structure is on stack.             

Prototype:
		void fir(const fract16 x[],fract16 y[],int n,fir_state_fr16 *s);
    
        x[]  -  input array 
		y[]  -  output array
		n    -  number of input samples(even)
        s    -  Structure of type fir_state_fr16:
	     	{
		    	fract16 *h, // filter coefficients 
			    fract16 *d, // start of delay line 
			    int k,	   	// no. of coefficients 
		    } fir_state_fr16;

Registers used   :   

	R0, R1, R2, R3, R3

	I0 -> Address of delay line (used for updating the delay line)
   	I1 -> Address of delay line (used for fetching the delay line data)
	I2 -> Address of filter coeff. h0, h1 , ... , hn-1
	I3 -> Address of output array y[]

	P0 -> Address of input array x[]
	P1, P2


Computation Time:
Total execution time for Number of Samples= Ni  & number of coefficients = Nc :

Kernal Cycle Count              :       (Ni/2){4 + ((Nc/2-1)*2)}  
Initialization                  :       36 + 5


For Ni=256 & Nc=8      Total execution time = 1321 cycles          

FIR filter code size :	146 bytes	
FIR filter core size :  44 bytes

Modified on 21.3.2001 for moving variable declaration to C and removing unncessary MNOPs.	

********************************************************************************************/
/*   Input buffer(in) , Output buffer(out), Delay line Buffer(delay) and filter coefficient
     buffer(h) are all aligned to 4 byte(word) boundary. The size of the delay line buffer
	 and coefficient buffer has to be 1 word more than the actual size if the number of
	 filter coeff. is odd.

*/


.section program;
.global    __fir;
.align 8;
__fir :		

			P0=[SP+12];		    // Address of the filter structure
			nop;nop;nop;
			P1=[P0++];			// Address of the filter coefficient array

			P2=[P0++];   		// Address of the delay line

			R3=[P0++];			// Number of filter coefficients

			B3=R1;				//Output buffer initialized as circular buffer
			I2=P1;				// Initialize I2 to the start of the filter coeff. array
			B2=P1;				// Filter coeff. array initialized as a circular buffer
			I0=P2;     			// start of the delay line write pointer 
			B0=P2;				// Delay line buffer is initialized as a circular buffer 	
			I1=P2;     			// start of the delay line read pointer
			B1=P2;				// Delay line buffer is initialized as a circular buffer 	

			I3=R1;
			P1=R2;	    		
			P2=R3;				

			R2=R2+R2;
			CC=BITTST(R3,0);	//Check if the number of filter taps is odd
			R3=R3+R3;			//As the filter coeff. are of fract16 (2 bytes)
			L2=R3;				//Initialize the filter coeff. length register
			P0=R0; 			    // Address of the input array   

			IF !CC JUMP FIR_CONTINUE (BP);
			R3+=2;				//Make the filter taps even			
			L2=R3;
			nop;nop;nop;
			I2-=2;              // Location where zero  has to be padded to coeff.  
			R0=0;
			W[I2++]=R0.L;		 //Set the last filter coeff. as zero to 
							     //force the number of filter taps even

FIR_CONTINUE:
			L0=R3;      		// Set the length of the delay line buffer
			L1=R3;      		// Set the length of the delay line buffer
			L3=R2;	

			P2+=-1;				//Nc-1 (Number of filter coefficients - 1)
			nop;nop;nop;
			I3-=4;				//Adjust the output pointer to the last location

			
/************************************************************************************************/												

				NOP ;                           // Align the instruction     
				I0+=4 || R2.H=W[I2--];	//Adjust the delay write pointer to X-n	
												//and coeff read pointer to h-n			
				R2.H=W[I2++] || R1=[I0];//Fetch hn-1. Fetch X-n and X-n+1
				R0=[P0++];		// Fetch X0 and X1


				LSETUP(E_FIR_START,E_FIR_END) LC0=P1>>1; //Loop 1 to Ni/2

E_FIR_START:	
				R1=PACK(R1.H,R0.H) || [I0++]=R0 || R2.L=W[I2++];
												//Store X1 into the lower half of R1.
												//Update the delay line.
												//Fetch h0 into lower half of R2
				LSETUP(E_MAC_ST,E_MAC_END)LC1=P2>>1;//Loop 1 to Nc/2 - 1
				A1=R2.L*R1.L, A0=R2.H*R1.H || R2.H=W[I2++] || [I3++]=R3;
												//A1=h0*X1, A0=hn-1*X-n+1.
												//Fetch h1 into upper half of R2.
												//Store the output.

E_MAC_ST:		// The 3 lines of code below can performed in parallel
				A1+=R0.L*R2.H,A0+=R0.L*R2.L || R2.L=W[I2++] || R0=[I1--];

				//A1+=R0.L*R2.H,A0+=R0.L*R2.L;
				//R2.L=W[I2++];					//A1+=X0*h1, A0+=X0*h0
				//R0=[I1--];						//Fetch filter coeff. h2 into the lower
												//half of R2. Fetch X-1 and X-2 into the
												//upper and lower half of R0 (for the 
												//first time in this loop)

E_MAC_END:		A1+=R0.H*R2.L,A0+=R0.H*R2.H  || R2.H=W[I2++] ;	//A1+=X-1*h2, A0+=X-1*h1
												//Fetch h3 into the upper half of R2.
												//(for the first time in this loop)		

E_FIR_END:		R3.H=(A1+=R0.L*R2.H),R3.L=(A0+=R0.L*R2.L)  || R0=[P0++] || R1=[I0]; 
									//A1+=X-n+2*hn-1, A0+=X-n+2*hn+2
									//Fetch the next pair of inputs (X2 and X3) into lower
									//and upper half of R0. Fetch X-n+2 and X-n+3 into R1

				[I3++]=R3;			//Store the final filtered output.

/*************************************************************************************************/
		  	L0=0;              // Clear the circular buffer initialization
			L1=0;
			L2=0;
			L3=0;
			RTS;				

__fir.end:
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -