📄 auto-regressive moving-average (arma) filter.txt
字号:
*TEXAS INSTRUMENTS, INC.
*
* IIR
*
* Revision Data: 05/13/97
*
* USAGE This routine is C Callable and can be called as:
*
* void iir(short *oPtr, short *iPtr, short *inPtr, short *b,
* short *a, int M)
*
* oPtr = output array (used)
* iPtr = input array
* inPtr = output array (stored)
* a = filter coefs
* b = filter coefs
* M = length (number of output samples)
*
* If routine is not to be used as a C callable function
* then all instructions relating to stack should be removed.
* Refer to comments of individual instructions. You will also
* need to initialize values for all of the values passed as these
* are assumed to be in registers as defined by the calling
* convention of the compiler, (refer to the C compiler reference
* guide).
*
* C CODE This is the C equivalent of the assembly code without
* restrictions. Note that the assembly code is hand optimized and
* restrictions may apply.
*
* void iir(short *oPtr, short *iPtr, short *inPtr, short *b,
* short *a, int M)
*
* {
* int j,i;
* int sum;
*
* for (i=0; i<M; i++){
* sum = b[0] * iPtr[4+i];
* for (j = 1; j <= 4; j++)
* sum += b[j]*iPtr[4+i-j]-a[j]*oPtr[4+i-j];
* oPtr[4+i] = (sum >> 15);
* inPtr[i] = oPtr[4+i];
* }
* }
*
*
* DESCRIPTION
* The iir performs an Auto-regressive moving-average (ARMA) filter
* with 4 auto-regressive filter coefficients and 5 moving-average
* filter coefficients for M output samples. The output vectro is
* stored in two locations. This routine is used as a high pass
* filter in the VSELP vocoder. All data is assumed to be 16-bit.
* To avoid memory hits Optr must be aligned on the next halfword
* boundary following the alignment of iPtr.
*
* TECHNIQUES
* The inner loop is completely unrolled and software pipelined
* (i.e. each time the 5 cycle loop "LOOP" is executed the inner
* loop of the C code is executed.)
*
* MEMORY NOTE
* To avoid memory hits Optr must be aligned on the next halfword
* boundary following the alignment of iPtr. Other wise there is a
* total of M memory hits (once per outer loop.)
*
* CYCLES M*5 + 16
* for M = 160 -> 816 cycles or 4.08 usec
*
*===============================================================================
.global _iir
.text
_iir:
STW .D2 A10,*B15-- ; push A10 on stack
|| MV .L1X B15,A1 ; copy stack pointer
STW .D1 A11,*--A1[2] ; push A11 on stack
|| STW .D2 B10,*B15--[2] ; push B10 on stack
STW .D2 B11,*B15 ; push B11 on stack
*** BEGIN Benchmark Timing ***
B_START
LDH .D2 *B6,B10 ; get b[0]
LDH .D1 *+A8[2],A7 ; get a[2]
|| LDH .D2 *+B6[2],B9 ; get b[2]
|| ADD .L2 8,B4,B4 ; i -> iPtr[4]
LDHU .D1 *+A8[1],A5 ; get a[1]
|| LDHU .D2 *+B6[1],B5 ; get b[1]
|| MV .S1 B8,A1 ; A1 = LEN (normally 160)
LDH .D1 *+A8[4],A5 ; get a[4]
|| LDH .D2 *+B6[4],B5 ; get b[4]
LDHU .D1 *+A8[3],A8 ; get a[3]
|| LDHU .D2 *+B6[3],B8 ; get b[3]
LDH .D2 *B4--,B0 ; get iPtr[4]
|| LDH .D1 *A4++[2],A0 ; get oPtr[0]
|| ADD .L1 8,A4,A11 ; used for store ptr
|| MVK .S2 1,B1 ; second primer
LDH .D2 *B4--,B0 ; get iPtr[3]
|| LDH .D1 *A4--,A9 ; get oPtr[2]
|| SHL .S1 A7,16,A7 ; put a[2] in 16MSBs
|| SHL .S2 B9,16,B9 ; put b[2] in 16MSBs
LDH .D2 *B4--,B0 ; get iPtr[2]
|| LDH .D1 *A4++[2],A0 ; get oPtr[1]
|| OR .L1 A7,A5,A7 ; a[2] & a[1]
|| OR .L2 B9,B5,B9 ; b[2] & b[1]
LDH .D2 *B4--,B0 ; get iPtr[1]
|| LDH .D1 *A4--[2],A9 ; get oPtr[3]
|| SHL .S1 A5,16,A5 ; put a[4] in 16MSBs
|| SHL .S2 B5,16,B5 ; put b[4] in 16MSBs
LDH .D2 *B4++[5],B0 ; get iPtr[0]
|| OR .L1 A5,A8,A5 ; a[4] & a[3]
|| OR .L2 B5,B8,B5 ; b[4] & b[3]
|| MVK .S2 0,B2 ; used for store priming cnt
|| [A1] B .S1 LOOP ; for (i=0; i<M; i++)
LOOP:
[B2] ADD .S1 A2,A10,A2 ; o2*a2+o1*a3+o0*a4
||[B2] ADD .L2 B11,B7,B11 ; i4*b0+i3*b1+i2*b2+i1*b3
|| MPY .M2 B0,B10,B8 ;* iPtr[4] * b[0]
|| MPYLH .M1 A0,A5,A8 ;* oPtr[0] * a[4]
|| LDH .D2 *B4--,B0 ;** get iPtr[4]
|| LDH .D1 *A4++,A0 ;** get oPtr[0]
[B2] ADD .S1 A2,A8,A2 ; o3*a1+o2*a2+o1*a3+o0*a4
||[B2] ADD .S2 B11,B7,B11 ; i4*b0+i3*b1+i2*b2+i1*b3+i0*b4
|| MPY .M2 B0,B9,B7 ;* iPtr[3] * b[1]
|| MPYLH .M1 A9,A7,A3 ;* oPtr[2] * a[2]
|| LDH .D2 *B4--,B0 ;** get iPtr[3]
||[!B2] LDH .D1 *+A4[1],A9 ;* get oPtr[2]
|| [A1] ADD .L1 -1,A1,A1 ; decrement loop counter
[B2] SUB .L1X B11,A2,A2 ; oPtr[4] is computed here
|| MPYLH .M2 B0,B9,B7 ;* iPtr[2] * b[2]
|| MPY .M1 A0,A5,A10 ;* oPtr[1] * a[3]
|| LDH .D2 *B4--,B0 ;** get iPtr[2]
|| LDH .D1 *A4,A0 ;** get oPtr[1]
[B2] SHR .S1 A2,15,A9 ; scale oPtr[4]
||[!B1] STH .D1 A9,*A6++ ; store oPtr[4] to *inPtr
|| MPY .M2 B0,B5,B7 ;* iPtr[1] * b[3]
|| ADD .L2 B8,B7,B11 ;* i4*b0+i3*b1
|| LDH .D2 *B4--,B0 ;** get iPtr[1]
||[B1] SUB .S2 B1,B2,B1 ; prime first store
[B2] STH .D1 A9,*+A4[1] ; store oPtr[4] to *iPtr
|| MPYLH .M2 B0,B5,B7 ;* iPtr[0] * b[4]
|| MPY .M1 A7,A9,A8 ;* oPtr[3] * a[1]
|| ADD .L1 A3,A8,A2 ;* o2*a2+o1*a3
|| ADD .S2 B11,B7,B11 ;* i4*b0+i3*b1+i2*b2
|| LDH .D2 *B4++[5],B0 ;** get iPtr[0]
|| [A1] B .S1 LOOP ;for (i=0; i<M; i++)
||[!B2] ADD .L2 1,B2,B2 ; used for store priming cnt
; LOOP ends here
STH .D1 A9,*A6 ; store oPtr[4] to *inPtr
|| MV .L1X B15,A1 ; copy stackpointer to A1
B_END:
*** END Benchmark Timing ***
LDW .D2 *B15++[2],B11 ; pop B11 off stack
|| LDW .D1 *++A1,A11 ; pop A11 off stack
|| B .S2 B3 ; return from call
LDW .D2 *B15++,B10 ; pop B10 off stack
|| LDW .D1 *++A1[2],A10 ; pop A10 off stack
NOP 4
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -