📄 fir_cplx_blk.asm
字号:
/**************************************************************
File Name: fir_cplx_blk.asm
Date Modified: 2/17/99 RFG
7/11/00 PPG
Purpose: Subroutine that implements a Complex Block FIR Filter
given coefficients and samples.
Equation: y(n) = Summation from k=0 to M of h(k)*x(n-k)
y = (A+Bj) * (C+Dj) = (AC-BD)+(AD+BC)j
Calling Parameters:
b0,i0 = address of delay line buffer in normal-word space
l0 = length of delay line buffer
b1,i1 = address of delay line buffer in long-word space
l1 = (length of delay line buffer)/2
b6,i6 = address of input buffer
b8,i8 = address of coefficients buffer
l8 = length of coefficients buffer
b9,i9 = address of output buffer
l9 = length of output buffer
r1 = number of taps in the filter
r2 = number of samples
r3 = number of taps in filter - 2
l6,l9 = 0
m10 = 1
m3 = -1
m1,m9 = 2
m4 = -2
Assumptions:
All arrays must start on even address boundaries.
All arrays must have an even number 32-bit word length (zero
pad if necessary).
The output buffer should be placed in long-word space.
The output buffer should have 1 additional location allocated
for a dummy write that allows optimization.
Return Values:
i9 points to the OUTPUT
Registers Affected:
f0,f1,f4,f8,f11,f12,f13
i0,i1,i6,i8,i9
Cycle Count:
8 + taps + samples(10 + (taps-2)*2) + 9 cache misses
Number of PM Locations:
21 instruction words
2 * Number of taps locations for coefficients
Number of samples + 1 locations for the output buffer
Number of DM Locations:
Number of taps locations for the delay line buffer
Number of samples locations for the input buffer
**************************************************************/
#include "def21160.h"
.global fir_cplx_blk;
/* program memory code */
.section/pm seg_pmco;
fir_cplx_blk:
/* Circular Buffer Enable, SIMD Mode enable */
bit set MODE1 CBUFEN | PEYEN;
nop;
f11=0.0; /* Used to zero accumumlators */
lcntr = r1, do clear_fir until lce;
clear_fir: dm(i0,m1) = f11;
lcntr = r2, do fir until lce; /* outer loop - sample loop */
f13=f0+f13, f0=dm(i6,m1) (LW); /* SF13=SF0+SF13 = Sum AD + Sum BC */
/* read A,B from INPUT[i] (F0=A, F1=B) */
dm(i1,m3)=f0, pm(i9,m10)=f12; /* transfer A,B to delayline, OUTPUT[N]=SF12,SF13 */
f12=pass f11, f0=dm(i0,m1), f4=pm(i8,m9); /* Zero accumulators F12/SF12, F0=A, SF0=B, F4=C, SF4=D */
f8=f0*f4, f13=abs f11, r0<->s0; /* F8=AC, SF8=BD, Zero accumulators F13/SF13, F0=B, SF0=A */
f8=f0*f4, f12=f8+f12, f0=dm(i0,m1), f4=pm(i8,m9); /* F8=BC, SF8=AD, F12=Sum AC, SF12=Sum BD */
/* F0=A, SF0=B, F4=C, SF4=D */
lcntr=r3, do macs until lce; /* FIR loop */
f8=f0*f4, f13=f8+f13, r0<->s0; /* F8=AC, SF8=BD, F13=Sum BC, SF13=Sum AD, F0=B, SF0=A */
macs: f8=f0*f4, f12=f8+f12, f0=dm(i0,m1), f4=pm(i8,m9); /* F8=BC, SF8=AD, F12=Sum AC, SF12=Sum BD */
/* F0=A, SF0=B, F4=C, SF4=D */
f8=f0*f4, f13=f8+f13, r0<->s0; /* F8=AC, SF8=BD, F13=Sum BC, SF13=Sum AD, F0=B, SF0=A */
f8=f0*f4, f12=f8+f12, f0=dm(i0,m4); /* F8=BC, SF8=AD, F12=Sum AC, SF12=Sum BD, decrement pointer to delayline */
f13=f8+f13, s0=r12; /* F13=Sum BC, SF0=F12=Sum AC */
fir: f12=f0-f12, s0=r13; /* SF12=SF0-SF12=Sum AC-Sum BD, SF0=F13=Sum BC */
rts (db), f13=f0+f13; /* SF13=SF0+SF13 = Sum AD + Sum BC */
bit clr MODE1 CBUFEN | PEYEN; /* Circular Buffer disable, SIMD disable */
pm(i9,m10)=f12; /* OUTPUT[N]=SF12,SF13 */
fir_cplx_blk.end:
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -