📄 iir_df1.asm
字号:
/*******************************************************************************
Copyright(c) 2000 - 2002 Analog Devices. All Rights Reserved.
Developed by Joint Development Software Application Team, IPDC, Bangalore, India
for Blackfin DSPs ( Micro Signal Architecture 1.0 specification).
By using this module you agree to the terms of the Analog Devices License
Agreement for DSP Software.
********************************************************************************
Module Name : iir_df1.asm
Label Name : __iir_df1
Version : 1.3
Change History :
Version Date Author Comments
1.3 11/18/2002 Swarnalatha Tested with VDSP++ 3.0
compiler 6.2.2 on
ADSP-21535 Rev.0.2
1.2 11/13/2002 Swarnalatha Tested with VDSP++ 3.0
on ADSP-21535 Rev. 0.2
1.1 03/17/2002 Manoj Modified to match
silicon cycle count
1.0 03/12/2001 Manoj Original
Description : This program implements a biquad, DF1 form, IIR filter for
1.15 format data and coefficients. The coefficient buffer that
is passed should be in the order b2,b1,b0,a2,a1,Bb2,Bb1,Bb0,
Aa2,Aa1.......... The value of a0 is unity. The first two
elements of the delay line are x(-2) and x(-1). The rest of
delay line buffer must be ordered as y(n-2), y(n-1) for each
stage. This program provides valid output only if the number
of biquad stages is greater than one (only for filter order
greater than 2). The equation implemented is:
y(n) = b0 * x(n) + b1 * x(n-1) + b2 * x(n-2) - a1 * y(n-1) -
a2 * y(n-2)
Note : The coefficients b's and a's generated using MATLAB can
be used as it is. However, the 'a' coefficients have to be
negated in some cases where the coefficient generation
software by itself gives negative 'a' coefficients.
Prototype : void iir_df1(const fract16 x[],fract16 y[],int n,
iir_state_fr16 *s);
x[] - input array
y[] - output array
n - number of input samples(even)
s - Structure of type iir_state_fr16:
typedef struct iir_state_fr16
{
fract16 *c,
// coefficients
fract16 *d,
// start of delay line
int k // no. of bi-quad stages
} iir_state_fr16;
Registers used : A0, A1, R0-R3, R6, R7, I0-I3, B1-B3, L0-L3, P0-P2, P5, LC0,
LC1.
Performance :
Code size : 224 Bytes
If Number of Samples= Ni & number of biquad stages = B
Kernal Cycle Count : (Ni/2){4 + ((B-1)*6) + 4}
: 3*Ni*B + Ni/2
Cycle Count : 457 Cycles (For Ni=64 & B=2)
*******************************************************************************/
/*Input buffer(in) , Output buffer(out), Delay line Buffer(delay) and filter
coefficient buffer(h) are all aligned to 4 byte(word) boundary.
*/
.section L1_code;
.global __iir_df1;
.align 8;
__iir_df1:
P0=[SP+12]; //Address of the structure
R2+=1; //R2 has the Number of input samples+1
[--SP]=(R7:6,P5:5); //Push P5 and R7-R6
P2=[P0++]; //Address of coefficients
P5=[P0++]; //Address of delay
R2 = R2 >> 1 || P1=[P0++];
I0=R0; //Address of input sample
I2=P2; //Address of the coefficients
I3=R1; //Address to store output
I1=P5; //Address of the delay line buffer
P5 += 4; //Points to output delay line
R0=P1 ;
R3=10 (Z); //No. of bytes for coeff. array = 2*5*No_stages (P1)
B3=R1; //Set the output buffer as a circular buffer
B2=P2; //Set the coefficient buffer as a circular buffer
B1=P5; //Set the output delay line buffer as a circular
// buffer
P0=R2; //(Number of samples+1)/2
P2=P1<<2; //No. of bytes for delay line = 2*2*No_stages (P1)
R2 = r2 << 2|| I3-=4 ;
R3.L=R3.L*R0.L(IS) || R0 = [I1++]|| R7=[I2] ;
// R3 = 10 * no: of stages
// Adjust the output buffer pointer to the last
// location, Fetch x(-2) and x(-1) into R0.
L2=R3; //Set the length of the coefficient buffer to
// 2*5*No_stages
L3=R2; //Set the length of the output buffer
L1=P2; // Set the length of the delay line buffer to
// 2*2*No_stages
L0=0; //Clear the length registers corresponding to input
// buffer
I1-=4 || R6 = [I1];
// Make I1 point to last elements of delay line
//buffer. Adjust the delay line pointer for a dummy
// write Fetch filter coefficients b2 and b1 into
// the lower and upper half of register R7.
R3 = [P5--] || R2=[I2++] ;
// Make P5 point to the start of delay line buffer.
// Fetch filter coefficients b2 and b1 into the
// lower and upper half of register R2.
P1+= -1; // P1 = Number of biquad stages - 1
// Fetch the output of last stage from delay line
// buffer as this is written as at the start.
LSETUP (START_B,END_B) LC0=P0;
//Loop for (number of samples+1)/2
A0=R7.H*R0.H || R7.L=W[I2];
//A0=b1*X-1. Fetch b0 into lower half of R7
START_B:
A1=R2.L*R0.H, A0+=R2.L*R0.L || R0=[I0] || [I1++]=R6 ;
//A1=b2*X-1,A0+=b2*X-2. Fetch input samples
//X0 and X1 into the lower and upper half of
//R0. Store Z-2 and Z-1 in the delay line.
A1+=R7.H*R0.L, A0+=R7.L*R0.L || R0.L=W[I1++] || R2=[I2++];
//A1+=b1*X0,A0+=b0*X0. Fetch Y-2 into the
//lower half of R0. Fetch b0 and a2 into the
//lower and upper half of R2.
LSETUP (BSTART,BEND) LC1=P1;
//Loop for no. of biquad stages-1
BSTART: A1+=R2.L*R0.H, A0-=R2.H*R0.L || R0.H=W[I1--] || R2.L=W[I2++];
//A1+=b0*X1,A0-=a2*Y-2. Fetch Y-1 into the
//upper half of R0. Fetch a1 into the lower
//half of R2.
A1-=R2.H*R0.H, R0.L=(A0-=R2.L*R0.H)|| R1=[I1] || R3.H=W[I2++];
//A1-=a2*Y-1,Y0=(A0-=a1*Y-1). Y0 is stored
//in the lower half of R0. Fetch Y-2 and
//Y-1 into the lower and upper half of R1.
//Fetch B2 into the upper half of register R3.
A0=R3.H*R1.L || [I3]=R6 || R2.H=W[I2++];
//A0=B2*Y-2. Store the previous output into
//the output buffer. Fetch B1 into the upper
//half of R2.
R0.H=(A1-=R2.L*R0.L), A0+=R2.H*R0.H || R2.L=W[I2++];
//Y1=(A1-=a1*Y0),A0+=B1*Y-1. Fetch B0 into
//the lower half of R2.
A1=R3.H*R1.H || [I1++]=R0;
//A1=B2*Y-1. Update the delay line by replacing
//Y-2 and Y-1 with Y0 and Y1 respectively.
BEND: A1+=R2.H*R0.L, A0+=R2.L*R0.L || R0.L=W[I1++] || R2.H=W[I2++] ;
//A1+=B1*Y0,A0+=B0*Y0. Fetch Z-2 into the lower
//half of R0. Fetch A2 into the upper half of R2.
A1+=R2.L*R0.H, A0-=R2.H*R0.L || R3.H=W[I1--] || R2.L=W[I2++] ;
//A1+=B0*Y1,A0-=A2*Z-2. Fetch Z-1 into the upper
//half of R3. Fetch A1 into the lower half of R2.
A1-=R2.H*R3.H,R6.L=(A0-=R2.L*R3.H)|| R0=[I0++] || R7=[I2++] ;
//A1-=A2*Z-1,Z0=(A0-=A1*Z-1). Fetch X0 and X1 into
//the lower and upper half of R0. Fetch b2 and b1
//into the lower and upper half of R7.
A0=R7.H*R0.H || R7.L=W[I2] || I2-=4;
//A0=b1*X1. Fetch b0 into lower half of R7. Adjust
//the coefficient buffer pointer to the start of the
//coefficient buffer.
END_B: R6.H=(A1-=R2.L*R6.L) || R2=[I2++] || I3+=4 ;
//Z1=(A1-=A1*Z0). Fetch b2 and b1 into the lower and
//upper half of R2. Increment the output buffer
//pointer.
I0 -= 4 || [I3++]=R6; // Make I0 point to the last location so that it can
// be fetched and put to delay line buffer
// Store the final output to the output buffer
[I1++]=R6 || R0 = [I0]; // Write the final output into the delay line and
// fetch last two input samples
[P5] = R0; // Write the last two input samples to start of
// delay line buffer
(R7:6,P5:5)=[SP++]; //Pop P5 and R7-R6
RTS;
NOP; //to avoid one stall if LINK or UNLINK happens to be
//the next instruction after RTS in the memory.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -