📄 iir.asm
字号:
/********************************************************************************************************
Copyright(c) 2000 Analog Devices/Intel
Developed by JD(FRIO) Software Application Team, IPDC, Bangalore, India
*********************************************************************************************************
File name : iir.asm
Module name : iir
Label name : __iir
Description : This program implements a biquad, DF1 form, IIR filter for 1.15 format data and coefficients.
The coefficient buffer that is passed should be in the order b2,b1,b0,a2,a1,Bb2,Bb1,Bb0,
Aa2,Aa1.......... The value of a0 is unity. The first two elements of the delay line are
x(-2) and x(-1). The rest of delay line buffer must be ordered as y(n-2), y(n-1) for each stage.
This program provides valid output only if the number of biquad stages is greater than one
(only for filter order greater than 2). The equation implemented is:
y(n) = b0 * x(n) + b1 * x(n-1) + b2 * x(n-2) - a1 * y(n-1) - a2 * y(n-2)
Note : The coefficients b's and a's generated using MATLAB can be used as it is. However, the
'a' coefficients have to be negated in some cases where the coefficient generation software
by itself gives negative 'a' coefficients.
Registers used :
R0, R1, R2, R3, R6, R7
I0 -> Address of the input buffer x[]
I1 -> Address of delay line buffer d[]
I2 -> Address of coefficient buffer c[]
I3 -> Address of Output y[]
P0 -> (No: of input samples+1) / 2
P1 -> Number of stages
P2
P5 -> Address of delay line buffer
[SP+12] -> Address of structure s
Function Prototype :
void iir(const fract16 x[],fract16 y[],int n,iir_state_fr16 *s);
x[] - input array
y[] - output array
n - number of input samples(even)
s - Structure of type iir_state_fr16:
typedef struct iir_state_fr16
{
fract16 *c, // coefficients
fract16 *d, // start of delay line
int k // no. of bi-quad stages
} iir_state_fr16;
Computation Time:
Total execution time for Number of Samples= Ni & number of biquad stages = B :
Kernal Cycle Count : (Ni/2){4 + ((B-1)*6) + 4}
: 3*Ni*B + Ni/2
Initialization : 33 + 7 = 40
For Ni=64 & B=2
Total execution time = 456 cycles
IIR filter code size : 222 bytes
IIR filter core size : 108 bytes
*******************************************************************/
/* Input buffer(in) , Output buffer(out), Delay line Buffer(delay) and filter coefficient
buffer(h) are all aligned to 4 byte(word) boundary.
*/
.section data1;
.align 4096;
.var _delay[20];
.global _delay;
.align 4096;
.var _c[20];
.global _c;
.align 4096;
.var _in[70];
.global _in;
.var _out[70];
.global _out;
.section program;
.global __iir;
.align 8;
__iir:
P0=[SP+12]; //Address of the structure
R2+=1; //R2 has the Number of input samples+1
[--SP]=(R7:6,P5:5); //Push P5 and R7-R6
P2=[P0++]; //Address of coefficients
P5=[P0++]; //Address of delay
P1=[P0++]; //Number of Stages
R2 >>=1;
I0=R0; //Address of input sample
I2=P2; //Address of the coefficients
I3=R1; //Address to store output
I1=P5; //Address of the delay line buffer
P5 += 4; //Points to output delay line
R0=P1 ;
R3=10 (Z); //No. of bytes for coeff. array = 2*5*No_stages (P1)
B3=R1; //Set the output buffer as a circular buffer
B2=P2; //Set the coefficient buffer as a circular buffer
B1=P5; //Set the output delay line buffer as a circular buffer
P0=R2; //(Number of samples+1)/2
P2=P1<<2; //No. of bytes for delay line = 2*2*No_stages (P1)
R2<<=2;
R3.L=R3.L*R0.L(IS) || I3-=4 || R0 = [I1++] ;
// R3 = 10 * no: of stages
// Adjust the output buffer pointer to the last location.
// Fetch x(-2) and x(-1) into R0.
L0=0; //Clear the length registers corresponding to input buffer
L3=R2; //Set the length of the output buffer
L1 = 0;
L2=R3; //Set the length of the coefficient buffer to 2*5*No_stages
L1=P2; // Set the length of the delay line buffer to 2*2*No_stages
I1-=4 || R7=[I2];
// Make I1 point to last elements of delay line buffer. Adjust the delay line pointer for a dummy write
// Fetch filter coefficients b2 and b1 into the lower and upper half of register R7.
R3 = [P5--] || R2=[I2++];
// Make P5 point to the start of delay line buffer.
// Fetch filter coefficients b2 and b1 into the lower and upper half of register R2.
R3 = B[P1--] || R6 = [I1];
// P1 = Number of biquad stages - 1
// Fetch the output of last stage from delay line buffer as this is written as at the start.
LSETUP (START_B,END_B) LC0=P0; //Loop for (number of samples+1)/2
A0=R7.H*R0.H || R7.L=W[I2] ;
//A0=b1*X-1. Fetch b0 into lower half of R7
START_B:
A1=R2.L*R0.H, A0+=R2.L*R0.L || R0=[I0] || [I1++]=R6 ;
//A1=b2*X-1,A0+=b2*X-2. Fetch input samples
//X0 and X1 into the lower and upper half of
//R0. Store Z-2 and Z-1 in the delay line.
A1+=R7.H*R0.L, A0+=R7.L*R0.L || R0.L=W[I1++] || R2=[I2++] ;
//A1+=b1*X0,A0+=b0*X0. Fetch Y-2 into the
//lower half of R0. Fetch b0 and a2 into the
//lower and upper half of R2.
LSETUP (BSTART,BEND) LC1=P1; //Loop for no. of biquad stages-1
BSTART:
A1+=R2.L*R0.H, A0-=R2.H*R0.L || R0.H=W[I1--] || R2.L=W[I2++] ;
//A1+=b0*X1,A0-=a2*Y-2. Fetch Y-1 into the
//upper half of R0. Fetch a1 into the lower
//half of R2.
A1-=R2.H*R0.H, R0.L=(A0-=R2.L*R0.H)|| R1=[I1] || R3.H=W[I2++] ;
//A1-=a2*Y-1,Y0=(A0-=a1*Y-1). Y0 is stored
//in the lower half of R0. Fetch Y-2 and
//Y-1 into the lower and upper half of R1.
//Fetch B2 into the upper half of register R3.
A0=R3.H*R1.L || [I3]=R6 || R2.H=W[I2++] ;
//A0=B2*Y-2. Store the previous output into
//the output buffer. Fetch B1 into the upper
//half of R2.
R0.H=(A1-=R2.L*R0.L),A0+=R2.H*R0.H || R2.L=W[I2++] ;
//Y1=(A1-=a1*Y0),A0+=B1*Y-1. Fetch B0 into
//the lower half of R2.
A1=R3.H*R1.H || [I1++]=R0 ;
//A1=B2*Y-1. Update the delay line by replacing
//Y-2 and Y-1 with Y0 and Y1 respectively.
BEND: A1+=R2.H*R0.L, A0+=R2.L*R0.L || R0.L=W[I1++] || R2.H=W[I2++] ;
//A1+=B1*Y0,A0+=B0*Y0. Fetch Z-2 into the lower
//half of R0. Fetch A2 into the upper half of R2.
A1+=R2.L*R0.H, A0-=R2.H*R0.L || R3.H=W[I1--] || R2.L=W[I2++] ;
//A1+=B0*Y1,A0-=A2*Z-2. Fetch Z-1 into the upper
//half of R3. Fetch A1 into the lower half of R2.
A1-=R2.H*R3.H, R6.L=(A0-=R2.L*R3.H)|| R0=[I0++] || R7=[I2++] ;
//A1-=A2*Z-1,Z0=(A0-=A1*Z-1). Fetch X0 and X1 into
//the lower and upper half of R0. Fetch b2 and b1
//into the lower and upper half of R7.
A0=R7.H*R0.H || R7.L=W[I2] || I2-=4;
//A0=b1*X1. Fetch b0 into lower half of R7. Adjust
//the coefficient buffer pointer to the start of the
//coefficient buffer.
END_B: R6.H=(A1-=R2.L*R6.L) || R2=[I2++] || I3+=4 ;
//Z1=(A1-=A1*Z0). Fetch b2 and b1 into the lower and
//upper half of R2. Increment the output buffer
//pointer.
I0 -= 4 || [I3++]=R6;
// Make I0 point to the last location so that it can be fetched and put to delay line buffer
// Store the final output to the output buffer
[I1++]=R6 || R0 = [I0];
// Write the final output into the delay line and fetch last two input samples
[P5] = R0;
// Write the last two input samples to start of delay line buffer
(R7:6,P5:5)=[SP++]; //Pop P5 and R7-R6
RTS;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -