📄 iir32_emac.s

📁 freescale MAC DSP的算法库（FFT
💻 S
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
;************************************************************************
;*
;* Copyright:
;*	Freescale Semiconductor, INC. All Rights Reserved.  
;*  You are hereby granted a copyright license to use, modify, and
;*  distribute the SOFTWARE so long as this entire notice is
;*  retained without alteration in any modified and/or redistributed
;*  versions, and that such modified versions are clearly identified
;*  as such. No licenses are granted by implication, estoppel or
;*  otherwise under any patents or trademarks of Freescale Semiconductor, 
;*  Inc. This software is provided on an "AS IS" basis and without warranty.
;*
;*  To the maximum extent permitted by applicable law, FREESCALE 
;*  DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, INCLUDING 
;*  IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR
;*  PURPOSE AND ANY WARRANTY AGAINST INFRINGEMENT WITH REGARD TO THE 
;*  SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) AND ANY 
;*  ACCOMPANYING WRITTEN MATERIALS.
;* 
;*  To the maximum extent permitted by applicable law, IN NO EVENT
;*  SHALL FREESCALE BE LIABLE FOR ANY DAMAGES WHATSOEVER (INCLUDING 
;*  WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS 
;*  INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY
;*  LOSS) ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.   
;* 
;*  Freescale assumes no responsibility for the maintenance and support
;*  of this software
;*********************************************
;* FILENAME: iir32.s
;*
;* PURPOSE: IIR32 module source file, containing functions for allocating/deallocating 
;*	        data structures for filter and computing an Infinite Impulse
;*	        Responce filter.
;*
;* AUTHOR: original code was written by Andrey Butok,
;*		   optimized for eMAC unit by Dmitriy Karpenko	        
;*********************************************

 .section .text       ;-=Locate the code in the ".text" section.=-
;#define __EMAC_H
;#include "emac.h"

 .ALIGN 4
 .XDEF _IIR32_EMAC
 .extern _malloc
 .extern _free
 
 
 
;******************************************************
;* NAME: void IIR32( struct tIirStruct *pIIR, Frac32* pX, Frac32* pY, unsigned int n)
;*
;* DESCRIPTION: Computes a Infinite Impulse Response (IIR) filter for a array of 32-bit fractional data values.
;******************************************************
;* a2          pIIR        - Pointer to a data structure containing private data for the iir filter
;* 68(a7)      pX          - Pointer to the input vector of n data elements
;* 72(a7)      pY          - Pointer to the output vector of n data elements
;* d2          k           - Counter for inner loop
;* d1          i           - Counter for outer loop
;* d0          N           - Length of coefficients vector(N<=n)
;* a0          pCurY       - Pointer to the current Y
;* a1          pCurX       - Pointer to the current X
;* a3          pCurCoef    - Pointer to the current coefficient
;* a4          pCurHistory - Pointer to the current element of history buffer
;* a5          pPredY      - Pointer to the previous Y
;******************************************************
_IIR32_EMAC:
;---=Saving values of used registers=---
 lea 		-60(a7),a7
 movem.l 	d0-d7/a0-a6,(a7)
;//Saving values of MAC status register
 lea		-4(a7),a7
 move.l		MACSR,d0
 move.l		d0,(a7)
 lea		4(a7),a7
 
;//defining the mode of MAC unit
;#ifdef __FRACT_M
move.l		#0x00000030,MACSR
;#else
;move.l 		#0x00000000,MACSR
;#endif
;---=Most useful parameters are moved from stack to registers.=--- 
 move.l 	72(a7),a0         ;  pCurY=pY;       -= Pointer to the current Y.=-
 move.l 	68(a7),a1         ;  pCurX=pX;       -= Pointer to the current X.=-
 move.l 	64(a7),a2         ;  N=pIIR->iIirCoefCount/2+1;
 move.l 	4(a2),d0
 lsr.l 		#1,d0
 addq.l 	#1,d0
;---====== Begin of getting Y[1]..Y[N] ======---
 move.l 	#0,ACC0           ;-=accumulators initialization=-
 move.l	 	#0,ACC1
 move.l 	#0,ACC2
 move.l 	#0,ACC3
 ;//computing a block of output samples from Y[1] to Y[N-N%4]
 moveq.l 	#4,d1          ;  for(i=4;i<=N;i+=4) { //Begin of outer loop #1
.FORi1:
 cmp.l 		d0,d1              ;  //Comparing i with N
 bhi 		.ENDFORi1          ;  //If (i>N) then jump to .ENDFORi1=-
 
 move.l 	68(a7),a6      	   ; pCurX=pX+i-4; //Current sample pointer initialization
 lea 		(-16,a6,d1.l*4),a1

 move.l 	(a2),a3        	   ; pCurCoef=pIIR->pIirCoef; //Current coefficient for input pointer initialization
 
 movem.l 	(a1),d3-d6     	   ;d3=*pCurX++; d4=*pCurX++; d5=*pCurX++; d6=*pCurX; pCurX-=3;
 move.l 	(a3)+,a6       	   ;a6=*pCurCoef++;	
   
 mac.l 		a6,d6,<<,-(a1),d6,ACC3  	;ACC3+=a6*d6; d6=*--pCurX; //getting next input sample
 mac.l 		a6,d5,<<,ACC2           	;ACC2+=a6*d5;
 mac.l 		a6,d4,<<,ACC1           	;ACC1+=a6*d4;
 mac.l 		a6,d3,<<,(a3)+,a6,ACC0  	;ACC0+=a6*d3; a6=*pCurCoef++;//getting next coefficient
 ;//cycle of multiplying 8 input samples on 4 coefficients per iteration
 move.l 	#4,d2          				    ;for(k=4; k<i; k+=4) { //Begin of inner loop #1

.FORk1:                            		
 cmp.l 		d1,d2                         	;//comparing k with i
 bcc  		.ENDFORk1                     	;//if (k>=i) then jump to .ENDFORk1
 
 adda.l 	#4,a3                      		;pCurCoef++; //skip the coefficient for output sample
 
 mac.l 		a6,d5,<<,-(a1),d5,ACC3  	;ACC3+=a6*d5; d5=*--pCurX;
 mac.l 		a6,d4,<<,ACC2           	;ACC2+=a6*d4;
 mac.l 		a6,d3,<<,ACC1           	;ACC1+=a6*d3;
 mac.l 		a6,d6,<<,(a3)+,a6,ACC0  	;ACC0+=a6*d6; a6=*pCurCoef++;
 
 adda.l 	#4,a3                      		;pCurCoef++; //skip the coefficient for output sample
 
 mac.l 		a6,d4,<<,-(a1),d4,ACC3  	;ACC3+=a6*d4; d4=*--pCurX;
 mac.l 		a6,d3,<<,ACC2           	;ACC2+=a6*d3;
 mac.l 		a6,d6,<<,ACC1           	;ACC1+=a6*d6;
 mac.l 		a6,d5,<<,(a3)+,a6,ACC0  	;ACC0+=a6*d5; a6=*pCurCoef++;
 
 adda.l 	#4,a3                      		;pCurCoef++; //skip the coefficient for output sample
 
 mac.l 		a6,d3,<<,-(a1),d3,ACC3    	;ACC3+=a6*d3; d3=*--pCurX;
 mac.l 		a6,d6,<<,ACC2             	;ACC2+=a6*d6;
 mac.l 		a6,d5,<<,ACC1             	;ACC1+=d6*d5;
 mac.l 		a6,d4,<<,(a3)+,a6,ACC0    	;ACC0+=a6*d4; a6=*pCurCoef++;

 adda.l 	#4,a3                      		;pCurCoef++; //skip the coefficient for output sample
 
 mac.l 		a6,d6,<<,-(a1),d6,ACC3  	;ACC3+=a6*d6; d6=*--pCurX;
 mac.l 		a6,d5,<<,ACC2           	;ACC2+=a6*d5;
 mac.l 		a6,d4,<<,ACC1           	;ACC1+=a6*d4;
 mac.l 		a6,d3,<<,(a3)+,a6,ACC0  	;ACC0+=a6*d3; a6=*pCurCoef++;

 addq.l 	#4,d2							;//incrementing k
 bra  		.FORk1							;//jumping to .FORk1
 
.ENDFORk1:									;} //end of inner loop #1
;//multiplying 3 first input samples on 3 coefficients
 adda.l 	#4,a3                      		;pCurCoef++; //skip the coefficient for output sample
 
 mac.l 		a6,d3,<<,ACC1              	;ACC1+=a6*d3;
 mac.l 		a6,d4,<<,ACC2              	;ACC2+=a6*d4;
 mac.l 		a6,d5,<<,(a3)+,a6,ACC3     	;ACC3+=a6*d5; a6=*pCurCoef++;
 
 adda.l 	#4,a3                      		;pCurCoef++; //skip the coefficient for output sample
 
 mac.l 		a6,d3,<<,ACC2               ;ACC2+=a6*d3;
 mac.l 		a6,d4,<<,(a3)+,a6,ACC3      ;ACC3+=a6*d4; a6=*pCurCoef++;
 
 adda.l	 	#4,a3                      		;pCurCoef++; //skip the coefficient for output signal
 
 mac.l 		a6,d3,<<,ACC3               ;ACC3+=a6*d3;
 
 ;//Testing that history buffer is not empty => this is not the first calling of this subroutine
tst.l		12(a2)							;if (pIIR=>iIirHistoryCount>0) { //if #1
beq			.ENDBUFy						;//if (pIIR=>iIirHistoryCount=0) then jump to .ENDBUFy

move.l     	8(a2),a6         				;pCurX=pIIR->pIirHistory+i*2-7; //Current sample pointer initialization
lsl.l		#1,d1
lea 		(-28,a6,d1.l*4),a1
lsr.l		#1,d1

move.l 		(a2),a6           				;pCurCoef=pIIR->pIirCoef+N*2-2; //Current coefficient for input pointer initialization
lsl.l		#1,d0
lea			(-8, a6, d0.l*4), a3
lsr.l		#1,d0
 
move.l    	(a1)+,d3						;d3=*pCurX++;				
adda.l		#4,a1							;pCurX++; // skip the output sample from history buffer
move.l		(a1)+,d4						;d4=*pCurX++;
adda.l		#4,a1							;pCurX++; // skip the output sample from history buffer
move.l		(a1)+,d5						;d5=*pCurX++;
adda.l		#4,a1							;pCurX++; // skip the output sample from history buffer
move.l		(a1)+,d6						;d6=*pCurX++;
adda.l		#4,a1							;pCurX++; // skip the output sample from history buffer
move.l 		-(a3),a6          				;a6=*--pCurCoef;
suba.l		#4,a3							;pCurCoef--; //skip the coefficient for output sample
 
cmp.l		d0,d1							;if (N==i) { //if #2
bne			.CONT							;//if (N!=i) then jump to .CONT
;//multiplying 3 input samples from history buffer on 3 coefficients 
mac.l 		a6,d3,<<,ACC0               ;ACC0+=a6*d3;
mac.l 		a6,d4,<<,ACC1               ;ACC1+=a6*d4;
mac.l 		a6,d5,<<,-(a3),a6,ACC2      ;ACC2+=a6*d5; a6=*--pCurCoef;

suba.l 		#4,a3                      		;pCurCoef--; //skip the coefficient for output sample
 
mac.l 		a6,d4,<<,ACC0               ;ACC0+=a6*d4;
mac.l 		a6,d5,<<,-(a3),a6,ACC1      ;ACC1+=a6*d5; a6=*--pCurCoef;
 
suba.l 		#4,a3                      		;pCurCoef--; //skip the coefficient for output sample
 
mac.l 		a6,d5,<<,ACC0               ;ACC0+=a6*d5;
 
bra			.ENDBUFx						;//jump to .ENDBUFx
  											;} //end if #2
.CONT:										;if (N!=i) { //if #3
 
mac.l		a6,d3,<<,(a1)+,d3,ACC0		;ACC0+=a6*d3; d3=*pCurX++;
mac.l		a6,d4,<<,ACC1				;ACC1+=a6*d4;
mac.l		a6,d5,<<,ACC2				;ACC2+=a6*d5;
mac.l		a6,d6,<<,-(a3),a6,ACC3		;ACC3+=a6*d6; a6=*--pCurCoef;

move.l  	d1,d2             				
addq.l		#4,d2
;//cycle of multiplying 8 input samples from history buffer on 4 coefficients per iteration
.FORk11:                            		;for(k=i+4; k<N; k+=4) { //begin of inner loop #2
cmp.l 		d0,d2                   		;//comparing k with N=-
bcc  		.ENDFORk11                    	;//if (k>=N) then jump to .ENDFORk11
 
suba.l     	#4,a3                      		;pCurCoef--; //skip the coefficient for output sample
adda.l		#4,a1							;pCurX++; //skip the output sample from history buffer
 
mac.l		a6,d4,<<,(a1)+,d4,ACC0		;ACC0+=a6*d4; d4=*pCurX++;
mac.l		a6,d5,<<,ACC1				;ACC1+=a6*d5;
mac.l		a6,d6,<<,ACC2				;ACC2+=a6*d6;
mac.l		a6,d3,<<,-(a3),a6,ACC3		;ACC3+=a6*d3; a6=*--pCurCoef;

suba.l 		#4,a3                      		;pCurCoef--; //skip the coefficient for output sample
adda.l		#4,a1							;pCurX++; //skip the output sample from history buffer
 
mac.l		a6,d5,<<,(a1)+,d5,ACC0		;ACC0+=a6*d5; d5=*pCurX++;
mac.l		a6,d6,<<,ACC1				;ACC1+=a6*d6;
mac.l		a6,d3,<<,ACC2				;ACC2+=a6*d3;
mac.l		a6,d4,<<,-(a3),a6,ACC3		;ACC3+=a6*d4; a6=*--pCurCoef;
 
suba.l 		#4,a3                      		;pCurCoef--; //skip the coefficient for output sample
adda.l		#4,a1							;pCurX++; //skip the output sample from history buffer
 
mac.l		a6,d6,<<,(a1)+,d6,ACC0		;ACC0+=a6*d6; d6=*pCurX++;
mac.l		a6,d3,<<,ACC1				;ACC1+=a6*d3;
mac.l		a6,d4,<<,ACC2				;ACC2+=a6*d4;
mac.l		a6,d5,<<,-(a3),a6,ACC3		;ACC3+=a6*d5; a6=*--pCurCoef;

suba.l 		#4,a3                      		;pCurCoef--; //skip the coefficient for output sample
adda.l		#4,a1							;pCurX++; //skip the output sample from history buffer
 
mac.l		a6,d3,<<,(a1)+,d3,ACC0		;ACC0+=a6*d3; d3=*pCurX++;
mac.l		a6,d4,<<,ACC1				;ACC1+=a6*d4;
mac.l		a6,d5,<<,ACC2				;ACC2+=a6*d5;
mac.l		a6,d6,<<,-(a3),a6,ACC3		;ACC3+=a6*d6; a6=*--pCurCoef;

addq.l 		#4,d2							;//incrementing k
bra  		.FORk11							;//jumping to .FORk11
 
.ENDFORk11:									;} //end of inner loop #2
;//cycle of multiplying 4 input samples from history buffer on 1 coefficient per iteration
suba.l 		#4,a3                      		;pCurCoef--; //skip the coefficient for output sample
adda.l		#4,a1							;pCurX++; //skip the output sample from history buffer
 
move.l		d0,d2							;//d2=(N-1)%4;
subq.l		#1,d2
andi.l		#3,d2
 
.FORk12:									;for(k=(N-1)%4; k>0; k--){//begin of inner loop #3=-
cmpi.l		#0,d2							;//comparing k with 0
beq			.ENDFORk12						;//if (k=0) then jump to .ENDFORk12
 
mac.l		a6,d4,<<,ACC0				;ACC0+=a6*d4;
mac.l		a6,d5,<<,ACC1				;ACC1+=a6*d5;
mac.l		a6,d6,<<,ACC2				;ACC2+=a6*d6;
mac.l		a6,d3,<<,-(a3),a6,ACC3		;ACC3+=a6*d3; a6=*--pCurCoef;
 
move.l		d5,d4							;d4=d5;
move.l		d6,d5							;d5=d6;
move.l		d3,d6							;d6=d3;
 
suba.l		#4,a3							;pCurCoef--; //skip the coefficient for output sample
move.l		(a1)+,d3						;d3=*pCurX++;
adda.l		#4,a1							;pCurX++; //skip the output sample from history buffer
 
subq.l		#1,d2							;//decrementing k
12 3 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -