📄 iir16_emac.s
字号:
;************************************************************************
;*
;* Copyright:
;* Freescale Semiconductor, INC. All Rights Reserved.
;* You are hereby granted a copyright license to use, modify, and
;* distribute the SOFTWARE so long as this entire notice is
;* retained without alteration in any modified and/or redistributed
;* versions, and that such modified versions are clearly identified
;* as such. No licenses are granted by implication, estoppel or
;* otherwise under any patents or trademarks of Freescale Semiconductor,
;* Inc. This software is provided on an "AS IS" basis and without warranty.
;*
;* To the maximum extent permitted by applicable law, FREESCALE
;* DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, INCLUDING
;* IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR
;* PURPOSE AND ANY WARRANTY AGAINST INFRINGEMENT WITH REGARD TO THE
;* SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF) AND ANY
;* ACCOMPANYING WRITTEN MATERIALS.
;*
;* To the maximum extent permitted by applicable law, IN NO EVENT
;* SHALL FREESCALE BE LIABLE FOR ANY DAMAGES WHATSOEVER (INCLUDING
;* WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS
;* INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY
;* LOSS) ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.
;*
;* Freescale assumes no responsibility for the maintenance and support
;* of this software
;*************************************************************************
;*
;* FILE NAME: iir16.s
;*
;* PURPOSE: IIR16 routines definition
;*
;* AUTHOR: Andrey Butok
;* IIR16 function optimized for eMAC by Igor Drozdinsky
;*
;***********************************************************************
.text
;#define __EMAC_H
;#include "emac.h"
;#ifdef __FRACT_M
#define __FMAC_SR16 0x00000070
;#else
;#define __FMAC_SR16 0x00000000
;#endif
.XDEF _IIR16_EMAC
.extern _malloc
.extern _free
;******************************************************
;* NAME: void IIR16( struct tIirStruct *pIIR, Frac16* pX, Frac16* pY, unsigned int n)
;*
;* DESCRIPTION: Computes a Infinite Impulse Response (IIR) filter for a array of 16-bit fractional data values.
;******************************************************
;* Used registers:
;* a2 pIIR - Pointer to a data structure containing private data for the IIR filter
;* d0, d1 iCurY0, iCurY1, iCurY2, iCurY3 - Curent Y
;* d2, d3 iCurX0, iCurX1, iCurX2, iCurX3 - Curent X
;* d4 iCurA, iCurB - Curent coefficients
;* d5 j - inner loop counter
;* d7 i - outer loop counter
;* d6 tmp - saved value for inner loop counter calculation
;* a0 pX - Pointer to the current X for outer loop
;* a1 pY - Pointer to the current Y for outer loop
;* a4 pCurX - Pointer to the current X for inner loop
;* a4 pCurHistory - Pointer to the current element of history buffer
;* a5 pPredY - Pointer to the previous Y for inner loop
;* a3 pCurCoef - Pointer to the current coefficient
;* a6 pIirCoef - Pointer to the coefficients -> {a0,a1,b1,a2,b2...}
;* ACC0 iOut0 - Accumulator 0
;* ACC1 iOut1 - Accumulator 1
;* ACC2 iOut2 - Accumulator 2
;* ACC3 iOut3 - Accumulator 3
;******************************************************
_IIR16_EMAC:
;---=Saving values of used registers=---
lea -64(a7), a7
movem.l d0-d7/a0-a6, (a7)
;--== Saving old MAC status register to the stack ==--
move.l MACSR, d0
move.l d0, 60(a7)
;--== Loading new MAC status register ==--
move.l #__FMAC_SR16, d0
move.l d0, MACSR
;---=Most useful parameters are moved from stack to registers.=---
move.l 72(a7), a0 ;pX
move.l 76(a7), a1 ;pY
move.l 68(a7), a2 ;pIIR
move.l (a2), a6 ;pIirCoef = pIIR->pIirCoef
move.l #0, ACC0
move.l #0, ACC1
move.l #0, ACC2
move.l #0, ACC3
;---====== Begin of cycle of getting Y[1]..Y[N] (N = (pIIR->iIirCoefCount - 1) / 2)======---
move.l 4(a2), d7 ;
subq.l #1, d7 ;tmp = i = pIIR->iIirCoefCount - 1;
move.l d7, d6 ;
asr.l #3, d7
beq .EndD4Z ;if(i >> 3)
;{
;--==First N output samples computation==--
move.l d6, d7
subq.l #8, d7 ;i -= 8;
;--==Computation without using history buffer==--
.BegD4: ;do{
;--== Next input samples loading ==--
move.l (a0), d2 ;iCurX0 = pX[0]; iCurX1 = pX[1];
move.l 4(a0), d3 ;iCurX2 = pX[2]; iCurX3 = pX[3];
movea.l a0, a4 ;pCurX = pX;
lea 8(a0), a0 ;pX += 4;
;--== Make four previous output samples zero==--
clr.l d0 ;iCurY0 = iCurY1 = 0;
clr.l d1 ;iCurY2 = iCurY3 = 0;
movea.l a1, a5 ;pCurY = pY;
;--== Next coefficient loading ==--
move.l a6, a3 ;pCurCoef = pIirCoef;
move.l (a3), d4 ;iCurA = *pCurCoef;
lea 2(a3), a3 ;pCurCoef++;
;--== Input samples and first coefficient multiplications ==--
mac.w d4.u, d2.u, <<, ACC0 ;iOut0 = iCurA * iCurX0;
mac.w d4.u, d2.l, <<, ACC1 ;iOut1 = iCurA * iCurX1;
mac.w d4.u, d3.u, <<, ACC2 ;iOut2 = iCurA * iCurX2;
mac.w d4.u, d3.l, <<, ACC3 ;iOut3 = iCurA * iCurX3;
;--== The count of inner loops calculation ==--
move.l d6, d5 ;
sub.l d7, d5 ;j = tmp - i - 8;
subq.l #8, d5 ;
ble .EndIn1 ;while(j > 0)
;{
.ForIn1Beg:
;--== Next coefficients loading ==--
move.l (a3)+, d4 ;iCurA = *pCurCoef++; iCurB = *pCurCoef++;
.ForIn1:
;--== Three first input samples and coefficients multiplication ==--
mac.w d4.u, d2.u, <<, ACC1 ;iOut1 += iCurA * iCurX0;
mac.w d4.u, d2.l, <<, ACC2 ;iOut2 += iCurA * iCurX1;
mac.w d4.u, d3.u, <<, ACC3 ;iOut3 += iCurA * iCurX2;
;--== Three first output samples and coefficients multiplication ==--
mac.w d4.l, d0.u, <<, ACC1 ;iOut1 += iCurB * iCurY0;
mac.w d4.l, d0.l, <<, ACC2 ;iOut2 += iCurB * iCurY1;
mac.w d4.l, d1.u, <<, ACC3 ;iOut3 += iCurB * iCurY2;
;--== Loading two next input samples ==--
move.l d2, d3 ;iCurX3 = iCurX1; iCurX2 = iCurX0;
move.l -(a4), d2 ;iCurX1 = *--pCurX; iCurX0 = *--pCurX;
;--== Loading two next output samples ==--
move.l d0, d1 ;iCurY3 = iCurY1; iCurY2 = iCurY0;
move.l -(a5), d0 ;iCurY1 = *--pCurY; iCurY0 = *--pCurY;
;--== Fourth input sample and coefficient multiplication ==--
mac.w d4.u, d2.l, <<, ACC0 ;iOut0 += iCurA * iCurX1;
;--== Fourth output sample and coefficient multiplication ==--
mac.w d4.l, d0.l, <<, ACC0 ;iOut0 += iCurB * iCurY1;
;--== Next coefficients loading ==--
move.l (a3)+, d4 ;iCurA = *pCurCoef++; iCurB = *pCurCoef++;
;--== Next four input sample and coefficients multiplication ==--
mac.w d4.u, d2.u, <<, ACC0 ;iOut0 += iCurA * iCurX0;
mac.w d4.u, d2.l, <<, ACC1 ;iOut1 += iCurA * iCurX1;
mac.w d4.u, d3.u, <<, ACC2 ;iOut2 += iCurA * iCurX2;
mac.w d4.u, d3.l, <<, ACC3 ;iOut3 += iCurA * iCurX3;
;--== Next four output sample and coefficients multiplication ==--
mac.w d4.l, d0.u, <<, ACC0 ;iOut0 += iCurB * iCurY0;
mac.w d4.l, d0.l, <<, ACC1 ;iOut1 += iCurB * iCurY1;
mac.w d4.l, d1.u, <<, ACC2 ;iOut2 += iCurB * iCurY2;
;--== Last mac instruction with next coefficients loading ==--
mac.w d4.l, d1.l, <<, (a3)+, d4, ACC3 ;iOut3 += iCurB * iCurY3;
;iCurA = *pCurCoef++; iCurB = *pCurCoef++;
subq.l #4, d5 ; j -= 4;
bgt .ForIn1 ;} //while(j > 0)
.EndIn1C:
;--== Correcting pointer to coefficients after inner loop ==--
lea -4(a3), a3 ;pCurCoef -= 4
.EndIn1:
move.l 12(a2), d5 ;if(pIIR->iIirHistoryCount)
;---=Computation using history buffer==--
beq .BegNoHistory ;{
;--== The count of inner loops calculation ==--
move.l d7, d5 ;
addq.l #8, d5 ;j = i + 8;
ble .EndInH ;if(j > 0)
;{
;--== Setting curent history buffer pointer ==--
move.l 8(a2), a4 ;
adda.l d6, a4 ;
adda.l d6, a4 ;pCurHistory = pIIR->pIirHistory + tmp * 2;
addq.l #1, d5 ;
btst #1, d5 ;if((j + 1) & 2)
beq .ForInHBeg ;{
;--== This part is executed only if (j + 1)/2 is odd ==--
subq.l #1, d5
;--== Next coefficients loading ==--
move.l (a3)+, d4 ;iCurA = *pCurCoef++; iCurB = *pCurCoef++;
;--== Next input sample loading from history buffer==--
move.w d2, d3 ;iCurX3 = iCurX2;
move.w -(a4), d2 ;iCurX2 = iCurX1
swap d2 ;iCurX1 = iCurX0
swap d3 ;iCurX0 = *--pCurHistory;
;--== Next output sample loading from history buffer==--
move.w d0, d1 ;iCurY3 = iCurY2;
move.w -(a4), d0 ;iCurY2 = iCurY1;
swap d0 ;iCurY1 = iCurY0;
swap d1 ;iCurY0 = *--pCurHistory;
mac.w d4.u, d2.u, <<, ACC0 ;iOut0 += iCurA * iCurX0;
mac.w d4.u, d2.l, <<, ACC1 ;iOut1 += iCurA * iCurX1;
mac.w d4.u, d3.u, <<, ACC2 ;iOut2 += iCurA * iCurX2;
mac.w d4.u, d3.l, <<, ACC3 ;iOut3 += iCurA * iCurX3;
mac.w d4.l, d0.u, <<, ACC0 ;iOut0 += iCurB * iCurY0;
mac.w d4.l, d0.l, <<, ACC1 ;iOut1 += iCurB * iCurY1;
mac.w d4.l, d1.u, <<, ACC2 ;iOut2 += iCurB * iCurY2;
mac.w d4.l, d1.l, <<, ACC3 ;iOut3 += iCurB * iCurY3;
subq.l #2, d5 ;j -= 2;
ble .EndInH ;}
;while(j > 0)
;{
.ForInHBeg:
subq.l #1, d5
;--== Next coefficients loading ==--
move.l (a3)+, d4 ;iCurA = *pCurCoef++; iCurB = *pCurCoef++;
.ForInH:
;--== Three first input samples and coefficients multiplication ==--
mac.w d4.u, d2.u, <<, ACC1 ;iOut1 += iCurA * iCurX0
mac.w d4.u, d2.l, <<, ACC2 ;iOut2 += iCurA * iCurX1
mac.w d4.u, d3.u, <<, ACC3 ;iOut3 += iCurA * iCurX2
;--== Three first output samples and coefficients multiplication ==--
mac.w d4.l, d0.u, <<, ACC1 ;iOut1 += iCurB * iCurY0
mac.w d4.l, d0.l, <<, ACC2 ;iOut2 += iCurB * iCurY1
mac.w d4.l, d1.u, <<, ACC3 ;iOut3 += iCurB * iCurY2
;--== Loading two next input samples ==--
move.l d2, d3 ;iCurX3 = iCurX1; iCurX2 = iCurX0;
move.l -(a4), d2 ;iCurX1 = *--pCurHistory; iCurX0 = *--pCurHistory;
;--== Loading two next output samples ==--
move.l d0, d1 ;iCurY3 = iCurY1; iCurY2 = iCurY0;
move.l -(a4), d0 ;iCurY1 = *--pCurHistory; iCurY0 = *--pCurHistory;
;--== Fourth input sample and coefficient multiplication ==--
mac.w d4.u, d2.l, <<, ACC0 ;iOut0 += iCurA * iCurX0
;--== Fourth output sample and coefficient multiplication ==--
mac.w d4.l, d2.u, <<, ACC0 ;iOut0 += iCurB * iCurY0
;--== Correcting input and output samples loaded from history buffer ==--
swap d2 ;
move.w d2, d4 ;
move.w d0, d2 ;
move.w d4, d0 ;
swap d2 ;
;--== Next coefficients loading ==--
move.l (a3)+, d4 ;iCurA = *pCurCoef++; iCurB = *pCurCoef++;
;--== Next Four input sample and coefficients multiplication ==--
mac.w d4.u, d2.u, <<, ACC0 ;iOut0 += iCurA * iCurX0
mac.w d4.u, d2.l, <<, ACC1 ;iOut1 += iCurA * iCurX1
mac.w d4.u, d3.u, <<, ACC2 ;iOut2 += iCurA * iCurX2
mac.w d4.u, d3.l, <<, ACC3 ;iOut3 += iCurA * iCurX3
;--== Next Four output sample and coefficients multiplication ==--
mac.w d4.l, d0.u, <<, ACC0 ;iOut0 += iCurB * iCurY0
mac.w d4.l, d0.l, <<, ACC1 ;iOut1 += iCurB * iCurY1
mac.w d4.l, d1.u, <<, ACC2 ;iOut2 += iCurB * iCurY2
mac.w d4.l, d1.l, <<, (a3)+, d4, ACC3 ;iOut3 += iCurB * iCurY3
;iCurA = *pCurCoef++; iCurB = *pCurCoef++;
subq.l #4, d5 ;j -= 4;
bgt .ForInH ;} // while(j > 0)
.EndInH:
bra .EndH ;} // if(pIIR->iIirHistoryCount)
;else{
.BegNoHistory:
;---=Final computations if history buffer is void==--
;--== Next coefficients loading ==--
move.l (a3)+, d4 ;iCurA = *pCurCoef++; iCurB = *pCurCoef++;
mac.w d4.u, d2.u, <<, ACC1 ;iOut1 += iCurA * iCurX1
mac.w d4.u, d2.l, <<, ACC2 ;iOut2 += iCurA * iCurX2
mac.w d4.u, d3.u, <<, ACC3 ;iOut3 += iCurA * iCurX3
mac.w d4.l, d0.u, <<, ACC1 ;iOut1 += iCurB * iCurY1
mac.w d4.l, d0.l, <<, ACC2 ;iOut2 += iCurB * iCurY2
mac.w d4.l, d1.u, <<, ACC3 ;iOut3 += iCurB * iCurY3
;--== Next coefficients loading ==--
move.l (a3)+, d4 ;iCurA = *pCurCoef++; iCurB = *pCurCoef++;
mac.w d4.u, d2.u, <<, ACC2 ;iOut2 += iCurA * iCurX2
mac.w d4.u, d2.l, <<, ACC3 ;iOut3 += iCurA * iCurX3
mac.w d4.l, d0.u, <<, ACC2 ;iOut2 += iCurB * iCurY2
mac.w d4.l, d0.l, <<, ACC3 ;iOut3 += iCurB * iCurY3
;--== Next coefficients loading ==--
move.l (a3)+, d4 ;iCurA = *pCurCoef++; iCurB = *pCurCoef++;
mac.w d4.u, d2.u, <<, ACC3 ;iOut3 += iCurA * iCurX3
mac.w d4.l, d0.u, <<, ACC3 ;iOut3 += iCurB * iCurY3
;} // else if (pIIR->iIirHistoryCount)
.EndH:
;--==Storing computed samples into the memory==--
move.l a6, a3 ;pCurCoef = pIirCoef
move.l 4(a3), d4 ;pCurB = pIirCoef[2];
move.w 8(a3), d4 ;pCurA = pIirCoef[4];
move.l 12(a3), d5 ;j = pIirCoef[6];
movclr.l ACC0, d0 ;*pY++ = iOut0
;#ifndef __FRACT_M
; swap d0 ;
;#endif
move.w d0, (a1)+ ;
mac.w d4.u, d0.l, <<, ACC1 ;iOut1 += iOut0 * pCurB
movclr.l ACC1, d1 ;
;#ifndef __FRACT_M
; swap d1 ;*pY++ = iOut1
;#endif
move.w d1, (a1)+ ;
mac.w d4.u, d1.l, <<, ACC2 ;iOut2 += iOut1 * pCurA
mac.w d4.l, d0.l, <<, ACC2 ;iOut2 += iOut0 * pCurB
movclr.l ACC2, d2 ;
;#ifndef __FRACT_M
; swap d2 ;*pY++ = iOut2
;#endif
move.w d2, (a1)+ ;
mac.w d4.u, d2.l, <<, ACC3 ;iOut3 += iOut2 * pCurA
mac.w d4.l, d1.l, <<, ACC3 ;iOut3 += iOut1 * pCurB
mac.w d5.u, d0.l, <<, ACC3 ;iOut3 += iOut0 * j
movclr.l ACC3, d0 ;
;#ifndef __FRACT_M
; swap d0 ;*pY++ = iOut3
;#endif
move.w d0, (a1)+ ;
subq.l #8, d7 ;i -= 8;
bgt .BegD4 ;}while(i > 0);
bra .EndD4
.EndD4Z:
move.l d6, d7 ;
beq .EndTailH ;
bra .BegTailH ;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -