📄 least mean square adaptive filter. n-1 inputs and n coefficients.txt
字号:
*===============================================================================
*
* TEXAS INSTRUMENTS, INC.
*
* FIRLMS2
*
* Revision Date: 5/5/97
*
* USAGE This routine is C Callable and can be called as:
*
* Long40 firlms2(short h[], short x[], short b, int n)
*
* h = Coefficient Array
* x = Input Array
* b = Error of from previous FIR
* n = Number of coefficients
*
* If routine is not to be used as a C callable function
* then all instructions relating to stack should be removed.
* Refer to comments of individual instructions. You will also
* need to initialize values for all of the values passed as these
* are assumed to be in registers as defined by the calling
* convention of the compiler, (refer to the C compiler reference
* guide).
*
* C Code This is the C equivalent of the Assembly Code without
* restrictions.
*
* Note that the assembly code is hand optimized and restrictions
* may apply
*
* Long40 firlms2(short h[], short x[], short b, int N)
* {
* int i;
* Long40 y = 0;
* for (i = 0; i < N; i++) {
* h[i] += (x[i] * b) >> 16;
* y += x[i + 1] * h[i];
* }
* return y;
* }
*
* DESCRIPTION
* This is an Least Mean Squared Adaptive FIR Filter. Given the
* error from the previous sample and pointer to the next sample
* it computes an update of the coefficents and then performs
* the FIR for the given input. This routine has no memory hits
* regardless of where x and h arrays are placed in memory. This
* routine assumes 16-bit input and output.
*
* TECHNIQUES
* The loop is unrolled once and the number of coefficients must be
* a multiple of 2.
*
* MEMORY NOTE
* This code has no memory hits regardless of where x and h are
* located in memory.
*
* CYCLES 1.5*N + 16
*
*******************************************************************************
.global _firlms2
.text
_firlms2:
STW .D2 B10, *B15--[2] ; push B10 on the stack
|| MV .L1X B15, A8 ; copy stack pointer
*** BEGIN Benchmark Timing ***
B_START
STW .D2 A10, *B15--[2] ; push A10 on the stack
|| STW .D1 B11, *--A8 ; push B11 on the stack
|| MV .L1X B3, A1 ; move return address
|| MV .L2X A6, B5 ; copy b
B .S1 LOOP ; for i
|| MVK .S2 4, B1 ; setup priming
|| ADD .L1X 2, B4, A3 ; copy x
|| LDH .D2 *B4++[2], A0 ;**** x0 = *x++, j=1
ADD .L2X A4, 2, B3 ; copy h
|| SHR .S2 B6, 1, B0 ; n / 2
|| STW .D2 A11, *B15 ;push A11 on the stack
|| STW .D1 B12, *--A8[2] ; push B12 on the stack
ADD .S2 1, B0, B0 ; n/2 + 1
|| SUB .L1 A10, A10, A11:A10 ; y = 0
|| SUB .L2 B9, B9, B9:B8 ; y = 0
|| LDH .D1 *A3++[2], B2 ;**** x0 = *x++, j=0
|| LDH .D2 *B4++[2], A0 ;**** x0 = *x++, j=1
LOOP:
[B0] B .S1 LOOP ;* for i
|| MV .L1X B2, A5 ;* copy x0, j=0
|| MPY .M2X 1, A0, B6 ;* copy x0, j=1
|| SHR .S2 B10, 16, B10 ;* e = f >> 16, j=1
|| MPY .M1 A0, A6, A9 ;** f = x0 * b, j=0
|| LDH .D1 *A4++[2], A2 ;*** h0 = *h++, j=0
|| LDH .D2 *B3++[2], B12 ;*** h0 = *h++, j=1
|| [B1] SUB .L2 B1, 1, B1 ;* priming count
[!B1] STH .D1 A7, *-A4[8] ; h[-1] = h1, j=0
|| [!B1] STH .D2 B7, *-B3[8] ; h[-1] = h1, j=1
|| ADD .S1 A9, A2, A7 ;* h1 = h0 + e, j=0
|| ADD .S2 B10, B12, B7 ;* h1 = h0 + e, j=1
|| MPY .M2 B2, B5, B10 ;** f = x0 * b, j=1
[B0] SUB .S2 B0, 1, B0 ; i++
|| [!B1] ADD .L1 A8, A11:A10,A11:A10 ; y += p, j=0
|| [!B1] ADD .L2 B11, B9:B8, B9:B8 ; y += p, j=1
|| MPY .M1 A5, A7, A8 ;* p = x0 * h1, j=0
|| MPY .M2 B6, B7, B11 ;* p = x0 * h1, j=1
|| SHR .S1 A9, 16, A9 ;** e = f >> 16, j=0
|| LDH .D1 *A3++[2], B2 ;**** x0 = *x++, j=0
|| LDH .D2 *B4++[2], A0 ;**** x0 = *x++, j=1
; end of LOOP
LDW .D2 *B15++, A11 ; pop A11 off the stack
|| MV .L2X A1, B3 ; move return address
|| MV .L1X B8, A4 ;
ADD .L1X A11, B9, A5 ; sum sums
|| LDW .D2 *B15++, B12 ; pop B12 off the stack
ADDU .L1 A10, A5:A4, A5:A4 ; sum sums
|| LDW .D2 *B15++, A10 ; pop A10 off the stack
B_END:
*** END Benchmark Timing ***
LDW .D2 *B15++, B11 ; pop B11 off the stack
|| B .S2 B3
LDW .D2 *B15, B10 ; pop B12 off the stack
NOP 4
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -