📄 lms_flp32.asm
字号:
/*************************************************************************
-this program implements a floating point lms filter
-it computes one output at a time
-it can be compiled and run on TS101 and TS201.
-TS101 has only 2 memory blocks (section 1 and 2) where data buffers may be placed.
-TS201 has 5 memory blocks (section 1, 2, 3, 4, 5) where data buffers may be placed.
-In this program only 2 memory blocks have been used to maintain compatibility between
TS101 and TS201
-2 tcl files are provided, one for TS101 and one for TS201. Both of them build
the correspondent project and save the output buffer into a file output.dat
-.align_code 4 instruction has been introduced throughout the main part of the program
to efficientize the cycle count for TS201 (On this processor they may be even discarded
if the cycle count is not of interest). For TS101, the assembly option -align-branch-lines
placed in the project properties tab has the same effect (On TS101 they are a must for
IF instructions).
-the number of cycles on TS201 is T+18=34 cycles for each output
-the filter equations are:
y(n)=sum hi(n)*x(n-i), sum is for i=0 to i=T-1 and n=0,1,2,3,...
y(n)=h0(n)*x(n)+h1(n)*x(n-1)+h2(n)*x(n-2)+...+h(T-1)(n)*x(n-(T-1))
e(n)=d(n)-y(n)
hi(n+1)=hi(n)+epsilon*e(n)*x(n-i)
h0(n+1)=h0(n)+epsilon*e(n)*x(n)
h1(n+1)=h1(n)+epsilon*e(n)*x(n-1)
...
-epsilon=0.3
-In order to reduce the number of cycles, the algorithm is executed in this order:
1)ee=epsilon*e(n-1) already computed
2)update the delay line
hi(n)=h(n-1)+epsilon*e(n-1)*x(n-i-1), i=0,1,..,15
h0(n)=h0(n-1)+ee*x(n-1)
h1(n)=h1(n-1)+ee*x(n-2)
...
h15(n)=h15(n-1)+ee*x(n-16)
3)introduce x(n) into the delay line
4)compute y(n)=h0(n)*x(n)+h1(n)*x(n-1)+h2(n)*x(n-2)+...+h(T-1)(n)*x(n-(T-1))
5)compute e(n)=d(n)-y(n)
6)compute ee=epsilon*e(n) which will be used at the next iteration
-the initial state of the delay line is presented in the first column.
-The accesses are done in quads.
x(-16) x(-15) x(-14) x(-13)
x(-12) x(-11) x(-10) x(-9)
x(-8) x(-7) x(-6) x(-5)
x(-4) x(-3) x(-2) x(-1)
x(-15) x(-14) x(-13) x(-12)
x(-11) x(-10) x(-9) x(-8)
x(-7) x(-6) x(-5) x(-4)
x(-3) x(-2) x(-1) x(0)
x(-14) x(-13) x(-12) x(-11)
x(-10) x(-9) x(-8) x(-7)
x(-6) x(-5) x(-4) x(-3)
x(-2) x(-1) x(0) x(1)
x(-13) x(-12) x(-11) x(-10)
x(-9) x(-8) x(-7) x(-6)
x(-5) x(-4) x(-3) x(-2)
x(-1) x(-16) x(-15) x(-14)
-based on these values, the new coefficients hi(n-1) are computed using
the expressions above
-then the quads x(-15) x(-11) x(-7) x(-3) are saved at the beginning of the buffer,
followed by x(-14) x(-10) x(-6) x(-2) and x(-13) x(-9) x(-5) x(-1).
-the last quad is obtained from x(-16) x(-12) x(-8) x(-4) by introducing x(0)
in the place of x(-16) and rotating the qaud to the left one location: the new
quad is x(-12) x(-8) x(-4) x(0)
-Because of this arrangement, the coefficients are arranged as following:
h(15)
h(11)
h(7)
h(3)
h(14)
h(10)
h(6)
h(2)
h(13)
h(9)
h(5)
h(1)
h(12)
h(8)
h(4)
h(0)
************************************************************************/
#define N 256
#define TAPS 16
/*************************************************************************/
.section data1;
.VAR indata[N] = "test1\input.dat";
.VAR output[N];
.var Epsilon_Error;
.align 4;
.var delay_line[TAPS];
.section data2;
.VAR coeff[TAPS];
.VAR LMS_desired[N] = "test1\desired.dat";
.VAR Error[N];
.var cycle_count; // program cycle counts
#ifdef __ADSPTS201__
#include <defts201.h>
#endif
#include "cache_macros.h"
/* Main Program Section */
.section program;
//-------------POWER UP---------------------------------------------------
powerup:
#ifdef __ADSPTS201__
/*in the case of TS201, at the beginning of the program the
cache must be enabled. The procedure is contained in the
cache_enable macro that uses the refresh rate as input parameter
-if CCLK=500MHz, refresh_rate=750
-if CCLK=400MHz, refresh_rate=600
-if CCLK=300MHz, refresh_rate=450
-if CCLK=250MHz, refresh_rate=375
*/
cache_enable(750);
//-------------Optional examples to preload cache-----------------------------
j0 = j31 + indata; LC0 = N/4;;
k0 = k31 + LMS_desired;;
ini_cache:
.align_code 4;
xr3:0 = q[j0+=0]; yr3:0 = q[k0+=0];;
.align_code 4;
if NLC0E, jump ini_cache; q[j0+=4] = xr3:0; q[k0+=4] = yr3:0;;
j0 = j31 + coeff; LC0 = TAPS/4;;
ini_cache1:
.align_code 4;
xr3:0 = q[j0+=0];;
.align_code 4;
if NLC0E, jump ini_cache1; q[j0+=4] = xr3:0;;
//----------------------------------------------------------------------------
#endif
ini_linear_addressing;
//write 0 across the delay line
xr1:0 = r1:0-r1:0;; //xr1:0=0
j0 = j31 + delay_line; LC0 = TAPS/4;;
ini_delay_line:
.align_code 4;
l[j0+=2] = xr1:0;;
.align_code 4;
if NLC0E, jump ini_delay_line; l[j0+=2] = xr1:0;;
//Epsilon*[des(n)-y(n)] initialized 0
[j31 + Epsilon_Error] = xr0;;
end_powerup:
//-------------TEST LOOP--------------------------------------------------
main:
j0 = j31 + indata;;
j1 = j31 + output;;
j2 = j31 + LMS_desired;;
j5 = j31 + Error; LC0 = N;;
testloop:
//the input x(0) is fetched
xr31 = [j0+=1];;
//the desired value is fetched
xr30 = [j2+=1];;
//read cycle counter
ini_cycle_count;
//-------------program--------------------------------------------------
_main_program:
j3 = j31 + delay_line;;
k3 = k31 + coeff;;
j4 = j31 + j3; k4 = k31 + k3;;
//load epsilon*[des(n-1)-y(n-1)] computed previous iteration
r29 = [j31+Epsilon_Error];;
// yr1:0=x(-12) x(-16)
// xr1:0=x(-4) x(-8) from the delay line
// yr3:2=h(11) h(15)
// xr3:2=h(3) h(7)
r1:0 = q[j3+=4]; r3:2 = q[k3+=4];;
// yr9:8=x(-11) x(-15)
// xr9:8=x(-3) x(-7) from the delay line
// yr11:10=h(10) h(14)
// xr11:10=h(2) h(6)
r9:8 = q[j3+=4]; r11:10 = q[k3+=4];;
//begin computing the new coefficients hi(n)=hi(n-1)*ee*x(n-1-i)
// update the delay line
fr4 = r0 * r29; q[j4+=4] = r9:8;;
fr5 = r1 * r29;;
// yr2=h15(n)
// xr2=h7(n)
fr4 = r8 * r29; fr2 = r2 + r4;;
// yr13:12=x(-10) x(-14)
// xr13:12=x(-2) x(-6) from the delay line
// yr3=h(11) yr15:14=h(9) h(13)
// xr3=h(3) xr15:14=h(1) h(5)
fr5 = r9 * r29; fr3 = r3 + r5; r13:12 = q[j3+=4]; r15:14 = q[k3+=4];;
//yr6=x(-15)*h15 yr10=h14(n)
//xr6=x(-7)*h7 xr10=h6(n) update the coeff buffer
//begin computing the filter
fr6 = r8 * r2; fr10= r10 + r4; q[k4+=4] = r3:2;;
// yr17:16=x(-9) x(-13)
// xr17:16=x(-1) x(-5) from the delay line
// yr19:18=h(8) h(12)
// xr19:18=h(0) h(4)
fr7 = r9 * r3; fr11= r11 + r5; r17:16 = q[j3+=4]; r19:18 = q[k3+=4];;
// update the coeff buffer
fr4 = r12 * r10; q[k4+=4] = r11:10;;
// update the delay line
fr5 = r13 * r11; q[j4+=4] = r13:12;;
// update the delay line
fr4 = r12 * r29;fr6 = r6 + r4; q[j4+=4] = r17:16;;
// begin rotating x(-12) x(-8) x(-4) x(0)
fr5 = r13 * r29;fr7 = r7 + r5; yr0 = yr1;;
// yr14=h13
// xr14=h5
fr4 = r16 * r29;fr14= r14 + r4; yr1 = xr0;;
// yr15=h9
// xr15=h1
fr5 = r17 * r29;fr15= r15 + r5; xr0 = xr1;;
// update the coeff buffer
fr4 = r16 * r14;fr18= r18 + r4; q[k4+=4] = r15:14;;
fr5 = r17 * r15;fr19= r19 + r5; xr1 = xr31;;
// update the coeff buffer
fr4 = r0 * r18; fr6 = r6 + r4; q[k4+=4] = r19:18;;
// update the delay line
fr5 = r1 * r19; fr7 = r7 + r5; q[j4+=4] = r1:0;;
fr6 = r6 + r4;;
fr7 = r7 + r5;;
fr6 = r6 + r7; xr4 = 0.3;;
xr7 = yr6;;
xfr6 = r6 + r7;;//y(n)
xfr7 = r30 - r6;;//des(n)-y(n)
xfr8 = r4 * r7;;
[j31+Epsilon_Error] = xr8;;
_main_program.end:
//read cycle counter and compute the program cycle count
comp_cycle_count;
//-------------TEST LOOP--------------------------------------------------
[j1+=1] = xr6;;
.align_code 4;
if NLC0E, jump testloop; [j5+=1] = xr7;;
main.end:
___lib_prog_term:
nop;nop;nop;nop;;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -