📄 fir_interp_gen.asm
字号:
/*******************************************************************************
Copyright(c) 2000 - 2002 Analog Devices. All Rights Reserved.
Developed by Joint Development Software Application Team, IPDC, Bangalore, India
for Blackfin DSPs ( Micro Signal Architecture 1.0 specification).
By using this module you agree to the terms of the Analog Devices License
Agreement for DSP Software.
********************************************************************************
Module Name : fir_interp_gen.asm
Label name : __fir_interp_gen
Version : 1.3
Change History :
Version Date Author Comments
1.3 11/18/2002 Swarnalatha Tested with VDSP++ 3.0
compiler 6.2.2 on
ADSP-21535 Rev.0.2
1.2 11/13/2002 Swarnalatha Tested with VDSP++ 3.0
on ADSP-21535 Rev. 0.2
1.1 03/27/2002 Nishanth Modified to match
silicon cycle count
1.0 06/02/2001 Nishanth Original
Description : This function performs FIR based Interpolation Filter. The
function produces the filtered interpolated output for a given
input data. The characteristics of the filter are dependant on
the coefficient values,the number of taps(L) and interpolation
factor(M) supplied by the calling program.
The coefficients stored in array `h` are applied to the
elements of vector `x[]`.
For filtering, 40 bit accumulator is used. The most
significant 16 bits of the result is stored in the output
vetor `y[ ]`computed according to a interpolation index `M`.
Coefficients are to be stored as polyphases.
The implementation of a interpolator is demonstrated in the
program.
The implementation provided below does not use a delay line
once it does not require samples older than x(0).
This has been done since delay line will not be correct for
polyphases greater than 1.
This implementation is divided into two stages and there is an
outer loop which is done no: of polyphases(M) times.
a) In the first stage, it finds the output samples which
require delay line, i.e. for the first L/M-1 output samples
y(0) = h(0) * x(0) + h(M) * x(-1) + .. + h(L-M) * x(- (L/M-1))
y(1) = h(1) * x(0) + h(M+1) * x(-1) + ...+ h(L-M+1)
* x(- (L/M-1))
...
y(M-1) = h(M-1) * x(0) + h(2*M-1) * x(-1) + ... h(L-1)
* x(- (L/M-1))
y(M) = h(0) * x(1) + h(M) * x(0) + ... + h(L-M+1)
* x(- (L/M-2))
....
This stage has been separated out due to the use of delay
line. There are two inner loops. One finds sum of terms
containing inputs present in delay line and the other, ones in
input buffer.
b) In the second stage, all the remaining output samples are
calculated. i.e. y((L/M)-1) to y(Nout - 1) are computed in
stage 2.
c) After filtering the input, the delay line is updated by the
last L/M-1 input samples.
d) Two output samples are computed simultaneously using the 2
MACs. For finding rest of output samples corresponding to each
input, there is an outer loop which is done M/2 times.
Assumptions : 1. This routine assumes that the number of filter
coefficients(L) is a multiple of interpolation factor(M)
since filtering of each sample requires L/M terms at the
maximum and is done assuming there are L/M terms.
2. L is assumed to be greater than M. Otherwise L/M - 1 = 0
and hence stage 1 need not be done but will be done once.
3. L/M must be atleast 3.
Prototype : void fir_interp_gen(const fract16 x[], fract16 y[], short Ni,
fract16 h[], int L, int M, int LBYM, fract16 d[]);
x[] - input array
y[] - output array
Ni - Number of input samples
h[] - Filter coefficient array
L - No. of coefficients
M - Interpolation Factor
LBYM - Number of coefficients in each polyphase(L/M)
d[] - Delay line buffer
Registers used : A0, R0-R3, R5-R7, I0-I3, B0-B3, M0, M1, M3, L0, L1, P0-P5,
LC0, LC1.
Performance :
Code Size : 266 Bytes
Cycle count : 5810 Cycles (For Ni=256, L=16 and M=2)
*******************************************************************************/
.section L1_code;
.global __fir_interp_gen;
.align 8;
__fir_interp_gen:
[--SP]=(R7:5,P5:3); // Push R7-R5 and P5-P3
R3 = [SP+36]; // Address of filter coefficients
R7 = [SP+40]; // Number of Coefficients (L)
R6 = [SP+44]; // Interpolation Factor (M)
P1 = [SP+48]; // (L/M) (INNER LOOP counter)
B0 = R0; // Input array circular buffer
I0 = R0; // Start of input array
R0 = R6 + R6(S) || P5=[SP+52];
// R6 = 2*M and Address of delay line buffer into P5
M1 = R0; // M1 = 2*M
R0 = -R0;
M3 = R0; // M3 = -2*M
B1 = P5; // Start of delay line(circular buffer)
I1 = P5;
R5 = P1; // R5 = L/M
R0 = R2 + R2(s); // R0 = 2*Ni
P2 = R2; // P2 = Ni
B3 = R1; // Output buffer is circular buffer
I3 = R1;
P5 = P1; // P5 = L/M
P3 = R6; // P3 = M
P5 += -1;
L0 = R0; // Length of circular buffer = 2*Ni
R5 = R5 + R5(s); // 2*L/M (USED TO MODIFY BASE OF COEFFICIENT
// BUFFFER)
R0.L = R0.L * R6.L(IS);
L3 = R0; // Length of circular buffer = No = (Ni*M)
L1 = R5; // Length of delay circular buffer = 2*L/M
L2 = R5; // Length of circular buffer = 2*L/M
R6 = 2(Z);
P2 -= P5; // P2 = Ni - (L/M - 1) (LOOP3 COUNTER)
I3 += M3; // Because I3+=M1 is done at the start of loop
R2 = R2 - R2(S) || I3 -= 2 || R1.L = W[I0++];
// Point to end because of one dummy write at start
// I0 += 2(BECAUSE I0-=2 IS DONE AT START OF LOOP)
P1 += -2;
FIR_INT_LOOP1:
I2 = R3; // Address of polyphase
B2 = R3; // Coefficien array is circular buffered
P0 = P5; // Loop counter for Loop 2a(using delay line) =
// L/M - 1
P4 = 0; // Loop counter for Loop 2b(using input buffer) = 1
W[I3++] = R7.L|| I2-=2;
// Modify I2, Store result produced by last input of
// the two polyphases
R2.L = R6.L + R6.H(S) || I0 -= 2 || R1.L = W[I2--];
// Modifier for input buffer and delay line is
// initialized to 2
// Input pointer is modified
// Last coefficient of the first polyphase is
// fetched to R1.L
M0 = R2; // Modify the modifier
LSETUP(FIR_INT_LOOP2_ST,FIR_INT_LOOP2_END) LC0 = P5;
// Execute loop L/M - 1 times
FIR_INT_LOOP2_ST:
A0=0 || R0.L = W[I1++] || R0.H = W[I0++];
// x(-(L/M-1)) is fetched to R0.L from delay line
// x(0) is fetched from input buffer
P4 += 1; // Increment input buffer counter
LSETUP(FIR_INT_LOOP2A,FIR_INT_LOOP2A) LC1 = P0;
P0 += -1; // Decrement delay line counter
FIR_INT_LOOP2A:
A0+=R0.L*R1.L || R0.L = W[I1++] || R1.L = W[I2--];
// Find sum of terms having samples from delay line
// Fetch delay line samples to R0.L and coefficients
// to R1.L
R2 = R2 + R6(S) || W[I3] = R7.L || I1 += M0;
// Modify copy of modifiers
// Store previous result and adjust delay line
// pointer
M0 = R2; // Modify the modifier
LSETUP(FIR_INT_LOOP2B,FIR_INT_LOOP2B) LC1 = P4;
FIR_INT_LOOP2B:
R7.L=(A0+=R0.H*R1.L) || R0.H = W[I0++] || R1.L = W[I2--];
// Find sum of terms having samples from input
// buffer
// Fetch input samples to R0.H and coefficients to
// R1.L
FIR_INT_LOOP2_END:
DISALGNEXCPT || I0 -= M0 || R0 = [I3++M1];
// Update input buffer pointer, output pointer
R3 = R3 + R5(S) || W[I3] = R7.L;
// Modify copy of base address of coefficient buffer
// to point to next polyphase
// Store the output
LSETUP(FIR_INT_LOOP3_ST,FIR_INT_LOOP3_END) LC0 = P2;
// Ni - L/M + 1
FIR_INT_LOOP3_ST:
// L/M times
A0=0 || R0.L = W[I0++] || I3+=M1;
// Fetch input sample to R0.L and modify output
// pointer
R7.L=(A0+=R0.L*R1.L) || R0.L = W[I0++] || R1.L = W[I2--];
R7.L=(A0+=R0.L*R1.L) || R0.L = W[I0++] || R1.L = W[I2--];
LSETUP(FIR_INT_LOOP3A,FIR_INT_LOOP3A) LC1= P1;
FIR_INT_LOOP3A:
R7.L=(A0+=R0.L*R1.L) || R0.L = W[I0++] || R1.L = W[I2--];
// Find interpolated output and Fetch input samples
// and coefficients
FIR_INT_LOOP3_END:
I0 -= M0 || W[I3] = R7.L;
// Modify input buffer pointer and store previous
// result
P3 += -1; // Modify outer loop counter for number of
// polyphases
R0.L = W[I1++] || I0 += M0;
// Modify delay line pointer and input buffer
// pointer
CC = P3 == 0;
IF !CC JUMP FIR_INT_LOOP1(BP);
// M times
I0-=4; // Input buffer pointer
R0.L=W[I0--] || I1-=4;
// Modify delay line pointer and Fetch last sample
LSETUP(FIR_INTERP_GEN_DELUPDATE,FIR_INTERP_GEN_DELUPDATE) LC0 = P5;
// L/M-1
FIR_INTERP_GEN_DELUPDATE:
W[I1--]=R0.L || R0.L=W[I0--];
// Updation of delay line
(R7:5,P5:3) = [SP++]; // Pop R7-R5 and P5-P3
RTS;
NOP; //to avoid one stall if LINK or UNLINK happens to be
//the next instruction after RTS in the memory.
__fir_interp_gen.end:
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -