📄 iir32_emac.s
字号:
;************************************************************************
;*
;* Copyright:
;* Freescale Semiconductor, INC. All Rights Reserved.
;* You are hereby granted a copyright license to use, modify, and
;* distribute the SOFTWARE so long as this entire notice is
;* retained without alteration in any modified and/or redistributed
;* versions, and that such modified versions are clearly identified
;* as such. No licenses are granted by implication, estoppel or
;* otherwise under any patents or trademarks of Freescale Semiconductor,
;* Inc. This software is provided on an "AS IS" basis and without warranty.
;*
;* To the maximum extent permitted by applicable law, FREESCALE
;* DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, INCLUDING
;* IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR
;* PURPOSE AND ANY WARRANTY AGAINST INFRINGEMENT WITH REGARD TO THE
;* SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) AND ANY
;* ACCOMPANYING WRITTEN MATERIALS.
;*
;* To the maximum extent permitted by applicable law, IN NO EVENT
;* SHALL FREESCALE BE LIABLE FOR ANY DAMAGES WHATSOEVER (INCLUDING
;* WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS
;* INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY
;* LOSS) ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.
;*
;* Freescale assumes no responsibility for the maintenance and support
;* of this software
;*********************************************
;* FILENAME: iir32.s
;*
;* PURPOSE: IIR32 module source file, containing functions for allocating/deallocating
;* data structures for filter and computing an Infinite Impulse
;* Responce filter.
;*
;* AUTHOR: original code was written by Andrey Butok,
;* optimized for eMAC unit by Dmitriy Karpenko
;*********************************************
.section .text ;-=Locate the code in the ".text" section.=-
;#define __EMAC_H
;#include "emac.h"
.ALIGN 4
.XDEF _IIR32_EMAC
.extern _malloc
.extern _free
;******************************************************
;* NAME: void IIR32( struct tIirStruct *pIIR, Frac32* pX, Frac32* pY, unsigned int n)
;*
;* DESCRIPTION: Computes a Infinite Impulse Response (IIR) filter for a array of 32-bit fractional data values.
;******************************************************
;* a2 pIIR - Pointer to a data structure containing private data for the iir filter
;* 68(a7) pX - Pointer to the input vector of n data elements
;* 72(a7) pY - Pointer to the output vector of n data elements
;* d2 k - Counter for inner loop
;* d1 i - Counter for outer loop
;* d0 N - Length of coefficients vector(N<=n)
;* a0 pCurY - Pointer to the current Y
;* a1 pCurX - Pointer to the current X
;* a3 pCurCoef - Pointer to the current coefficient
;* a4 pCurHistory - Pointer to the current element of history buffer
;* a5 pPredY - Pointer to the previous Y
;******************************************************
_IIR32_EMAC:
;---=Saving values of used registers=---
lea -60(a7),a7
movem.l d0-d7/a0-a6,(a7)
;//Saving values of MAC status register
lea -4(a7),a7
move.l MACSR,d0
move.l d0,(a7)
lea 4(a7),a7
;//defining the mode of MAC unit
;#ifdef __FRACT_M
move.l #0x00000030,MACSR
;#else
;move.l #0x00000000,MACSR
;#endif
;---=Most useful parameters are moved from stack to registers.=---
move.l 72(a7),a0 ; pCurY=pY; -= Pointer to the current Y.=-
move.l 68(a7),a1 ; pCurX=pX; -= Pointer to the current X.=-
move.l 64(a7),a2 ; N=pIIR->iIirCoefCount/2+1;
move.l 4(a2),d0
lsr.l #1,d0
addq.l #1,d0
;---====== Begin of getting Y[1]..Y[N] ======---
move.l #0,ACC0 ;-=accumulators initialization=-
move.l #0,ACC1
move.l #0,ACC2
move.l #0,ACC3
;//computing a block of output samples from Y[1] to Y[N-N%4]
moveq.l #4,d1 ; for(i=4;i<=N;i+=4) { //Begin of outer loop #1
.FORi1:
cmp.l d0,d1 ; //Comparing i with N
bhi .ENDFORi1 ; //If (i>N) then jump to .ENDFORi1=-
move.l 68(a7),a6 ; pCurX=pX+i-4; //Current sample pointer initialization
lea (-16,a6,d1.l*4),a1
move.l (a2),a3 ; pCurCoef=pIIR->pIirCoef; //Current coefficient for input pointer initialization
movem.l (a1),d3-d6 ;d3=*pCurX++; d4=*pCurX++; d5=*pCurX++; d6=*pCurX; pCurX-=3;
move.l (a3)+,a6 ;a6=*pCurCoef++;
mac.l a6,d6,<<,-(a1),d6,ACC3 ;ACC3+=a6*d6; d6=*--pCurX; //getting next input sample
mac.l a6,d5,<<,ACC2 ;ACC2+=a6*d5;
mac.l a6,d4,<<,ACC1 ;ACC1+=a6*d4;
mac.l a6,d3,<<,(a3)+,a6,ACC0 ;ACC0+=a6*d3; a6=*pCurCoef++;//getting next coefficient
;//cycle of multiplying 8 input samples on 4 coefficients per iteration
move.l #4,d2 ;for(k=4; k<i; k+=4) { //Begin of inner loop #1
.FORk1:
cmp.l d1,d2 ;//comparing k with i
bcc .ENDFORk1 ;//if (k>=i) then jump to .ENDFORk1
adda.l #4,a3 ;pCurCoef++; //skip the coefficient for output sample
mac.l a6,d5,<<,-(a1),d5,ACC3 ;ACC3+=a6*d5; d5=*--pCurX;
mac.l a6,d4,<<,ACC2 ;ACC2+=a6*d4;
mac.l a6,d3,<<,ACC1 ;ACC1+=a6*d3;
mac.l a6,d6,<<,(a3)+,a6,ACC0 ;ACC0+=a6*d6; a6=*pCurCoef++;
adda.l #4,a3 ;pCurCoef++; //skip the coefficient for output sample
mac.l a6,d4,<<,-(a1),d4,ACC3 ;ACC3+=a6*d4; d4=*--pCurX;
mac.l a6,d3,<<,ACC2 ;ACC2+=a6*d3;
mac.l a6,d6,<<,ACC1 ;ACC1+=a6*d6;
mac.l a6,d5,<<,(a3)+,a6,ACC0 ;ACC0+=a6*d5; a6=*pCurCoef++;
adda.l #4,a3 ;pCurCoef++; //skip the coefficient for output sample
mac.l a6,d3,<<,-(a1),d3,ACC3 ;ACC3+=a6*d3; d3=*--pCurX;
mac.l a6,d6,<<,ACC2 ;ACC2+=a6*d6;
mac.l a6,d5,<<,ACC1 ;ACC1+=d6*d5;
mac.l a6,d4,<<,(a3)+,a6,ACC0 ;ACC0+=a6*d4; a6=*pCurCoef++;
adda.l #4,a3 ;pCurCoef++; //skip the coefficient for output sample
mac.l a6,d6,<<,-(a1),d6,ACC3 ;ACC3+=a6*d6; d6=*--pCurX;
mac.l a6,d5,<<,ACC2 ;ACC2+=a6*d5;
mac.l a6,d4,<<,ACC1 ;ACC1+=a6*d4;
mac.l a6,d3,<<,(a3)+,a6,ACC0 ;ACC0+=a6*d3; a6=*pCurCoef++;
addq.l #4,d2 ;//incrementing k
bra .FORk1 ;//jumping to .FORk1
.ENDFORk1: ;} //end of inner loop #1
;//multiplying 3 first input samples on 3 coefficients
adda.l #4,a3 ;pCurCoef++; //skip the coefficient for output sample
mac.l a6,d3,<<,ACC1 ;ACC1+=a6*d3;
mac.l a6,d4,<<,ACC2 ;ACC2+=a6*d4;
mac.l a6,d5,<<,(a3)+,a6,ACC3 ;ACC3+=a6*d5; a6=*pCurCoef++;
adda.l #4,a3 ;pCurCoef++; //skip the coefficient for output sample
mac.l a6,d3,<<,ACC2 ;ACC2+=a6*d3;
mac.l a6,d4,<<,(a3)+,a6,ACC3 ;ACC3+=a6*d4; a6=*pCurCoef++;
adda.l #4,a3 ;pCurCoef++; //skip the coefficient for output signal
mac.l a6,d3,<<,ACC3 ;ACC3+=a6*d3;
;//Testing that history buffer is not empty => this is not the first calling of this subroutine
tst.l 12(a2) ;if (pIIR=>iIirHistoryCount>0) { //if #1
beq .ENDBUFy ;//if (pIIR=>iIirHistoryCount=0) then jump to .ENDBUFy
move.l 8(a2),a6 ;pCurX=pIIR->pIirHistory+i*2-7; //Current sample pointer initialization
lsl.l #1,d1
lea (-28,a6,d1.l*4),a1
lsr.l #1,d1
move.l (a2),a6 ;pCurCoef=pIIR->pIirCoef+N*2-2; //Current coefficient for input pointer initialization
lsl.l #1,d0
lea (-8, a6, d0.l*4), a3
lsr.l #1,d0
move.l (a1)+,d3 ;d3=*pCurX++;
adda.l #4,a1 ;pCurX++; // skip the output sample from history buffer
move.l (a1)+,d4 ;d4=*pCurX++;
adda.l #4,a1 ;pCurX++; // skip the output sample from history buffer
move.l (a1)+,d5 ;d5=*pCurX++;
adda.l #4,a1 ;pCurX++; // skip the output sample from history buffer
move.l (a1)+,d6 ;d6=*pCurX++;
adda.l #4,a1 ;pCurX++; // skip the output sample from history buffer
move.l -(a3),a6 ;a6=*--pCurCoef;
suba.l #4,a3 ;pCurCoef--; //skip the coefficient for output sample
cmp.l d0,d1 ;if (N==i) { //if #2
bne .CONT ;//if (N!=i) then jump to .CONT
;//multiplying 3 input samples from history buffer on 3 coefficients
mac.l a6,d3,<<,ACC0 ;ACC0+=a6*d3;
mac.l a6,d4,<<,ACC1 ;ACC1+=a6*d4;
mac.l a6,d5,<<,-(a3),a6,ACC2 ;ACC2+=a6*d5; a6=*--pCurCoef;
suba.l #4,a3 ;pCurCoef--; //skip the coefficient for output sample
mac.l a6,d4,<<,ACC0 ;ACC0+=a6*d4;
mac.l a6,d5,<<,-(a3),a6,ACC1 ;ACC1+=a6*d5; a6=*--pCurCoef;
suba.l #4,a3 ;pCurCoef--; //skip the coefficient for output sample
mac.l a6,d5,<<,ACC0 ;ACC0+=a6*d5;
bra .ENDBUFx ;//jump to .ENDBUFx
;} //end if #2
.CONT: ;if (N!=i) { //if #3
mac.l a6,d3,<<,(a1)+,d3,ACC0 ;ACC0+=a6*d3; d3=*pCurX++;
mac.l a6,d4,<<,ACC1 ;ACC1+=a6*d4;
mac.l a6,d5,<<,ACC2 ;ACC2+=a6*d5;
mac.l a6,d6,<<,-(a3),a6,ACC3 ;ACC3+=a6*d6; a6=*--pCurCoef;
move.l d1,d2
addq.l #4,d2
;//cycle of multiplying 8 input samples from history buffer on 4 coefficients per iteration
.FORk11: ;for(k=i+4; k<N; k+=4) { //begin of inner loop #2
cmp.l d0,d2 ;//comparing k with N=-
bcc .ENDFORk11 ;//if (k>=N) then jump to .ENDFORk11
suba.l #4,a3 ;pCurCoef--; //skip the coefficient for output sample
adda.l #4,a1 ;pCurX++; //skip the output sample from history buffer
mac.l a6,d4,<<,(a1)+,d4,ACC0 ;ACC0+=a6*d4; d4=*pCurX++;
mac.l a6,d5,<<,ACC1 ;ACC1+=a6*d5;
mac.l a6,d6,<<,ACC2 ;ACC2+=a6*d6;
mac.l a6,d3,<<,-(a3),a6,ACC3 ;ACC3+=a6*d3; a6=*--pCurCoef;
suba.l #4,a3 ;pCurCoef--; //skip the coefficient for output sample
adda.l #4,a1 ;pCurX++; //skip the output sample from history buffer
mac.l a6,d5,<<,(a1)+,d5,ACC0 ;ACC0+=a6*d5; d5=*pCurX++;
mac.l a6,d6,<<,ACC1 ;ACC1+=a6*d6;
mac.l a6,d3,<<,ACC2 ;ACC2+=a6*d3;
mac.l a6,d4,<<,-(a3),a6,ACC3 ;ACC3+=a6*d4; a6=*--pCurCoef;
suba.l #4,a3 ;pCurCoef--; //skip the coefficient for output sample
adda.l #4,a1 ;pCurX++; //skip the output sample from history buffer
mac.l a6,d6,<<,(a1)+,d6,ACC0 ;ACC0+=a6*d6; d6=*pCurX++;
mac.l a6,d3,<<,ACC1 ;ACC1+=a6*d3;
mac.l a6,d4,<<,ACC2 ;ACC2+=a6*d4;
mac.l a6,d5,<<,-(a3),a6,ACC3 ;ACC3+=a6*d5; a6=*--pCurCoef;
suba.l #4,a3 ;pCurCoef--; //skip the coefficient for output sample
adda.l #4,a1 ;pCurX++; //skip the output sample from history buffer
mac.l a6,d3,<<,(a1)+,d3,ACC0 ;ACC0+=a6*d3; d3=*pCurX++;
mac.l a6,d4,<<,ACC1 ;ACC1+=a6*d4;
mac.l a6,d5,<<,ACC2 ;ACC2+=a6*d5;
mac.l a6,d6,<<,-(a3),a6,ACC3 ;ACC3+=a6*d6; a6=*--pCurCoef;
addq.l #4,d2 ;//incrementing k
bra .FORk11 ;//jumping to .FORk11
.ENDFORk11: ;} //end of inner loop #2
;//cycle of multiplying 4 input samples from history buffer on 1 coefficient per iteration
suba.l #4,a3 ;pCurCoef--; //skip the coefficient for output sample
adda.l #4,a1 ;pCurX++; //skip the output sample from history buffer
move.l d0,d2 ;//d2=(N-1)%4;
subq.l #1,d2
andi.l #3,d2
.FORk12: ;for(k=(N-1)%4; k>0; k--){//begin of inner loop #3=-
cmpi.l #0,d2 ;//comparing k with 0
beq .ENDFORk12 ;//if (k=0) then jump to .ENDFORk12
mac.l a6,d4,<<,ACC0 ;ACC0+=a6*d4;
mac.l a6,d5,<<,ACC1 ;ACC1+=a6*d5;
mac.l a6,d6,<<,ACC2 ;ACC2+=a6*d6;
mac.l a6,d3,<<,-(a3),a6,ACC3 ;ACC3+=a6*d3; a6=*--pCurCoef;
move.l d5,d4 ;d4=d5;
move.l d6,d5 ;d5=d6;
move.l d3,d6 ;d6=d3;
suba.l #4,a3 ;pCurCoef--; //skip the coefficient for output sample
move.l (a1)+,d3 ;d3=*pCurX++;
adda.l #4,a1 ;pCurX++; //skip the output sample from history buffer
subq.l #1,d2 ;//decrementing k
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -