📄 dft.asm
字号:
/* Floating point DFT
dft.asm
Revision history:
12/03/99 Boris Lerner Initial version
04/27/00 Boris Lerner Added ___lib_prog_term as label at the end for
the simulator to print total cycles
TigerSHARC real DFT, Number of points = N must be a multiple of 4
Cycle counts | |
| Formula Cycles |
--------------+-----------------------------+
Initialization| 12 12 |
Kernel code | (N/4)*(2*N+10)+8 2216 |
Termination | 1 1 |
Memory usage | Bytes
------------------+-------------
Initialization | 48
Kernel code | 128
Termination | 8
------------------+-------------
Constant Data | 512
Non-constant Data | 768
The example waveform has a sum of 5 sinewaves.
*************************************************************************/
#ifdef __ADSPTS201__
#include <defts201.h>
#endif
#include "cache_macros.h"
#define N 64
/************************************************************************/
.section data1;
.var input[N] = "test64.dat"; // Input Data
.var output_real[N]; // Real Output
/************************************************************************/
.section data2;
.var output_imag[N]; // Imaginary Output
.var sin_cos[2*N] = "sin_cos.dat"; // Sine and Cosine Interleaved
/************************************************************************/
.section program;
.global _main;
/************************************** Start of code *****************************************/
_main:
#ifdef __ADSPTS201__#ifdef __ADSPTS201__
/*in the case of TS201, at the beginning of the program the
cache must be enabled. The procedure is contained in the
cache_enable macro that uses the refresh rate as input parameter
-if CCLK=500MHz, refresh_rate=750
-if CCLK=400MHz, refresh_rate=600
-if CCLK=300MHz, refresh_rate=450
-if CCLK=250MHz, refresh_rate=375
*/
cache_enable(750);
#endif
k6=k31+output_imag;; // K6 --> Imaginary Output
j6=j31+output_real; k5=k31+0;; // J6 --> Real Output
LC0=N/4; k7=k31-2;;
kB0 = k31 + sin_cos;; // K0 --> Sine/Cosine Table (Circular Buffer)
kL0 = k31 + 2*N;;
k0 = k31 + sin_cos;;
kB1 = k31 + sin_cos;; // K1 --> Sine/Cosine Table (Circular Buffer)
kL1 = k31 + 2*N;;
k1 = k31 + sin_cos;;
jB2 = j31 + input; r1:0=r1:0-r1:0;; // J2 --> Input (Circular Buffer)
jL2 = j31 + N; r3:2=r3:2-r3:2;;
j2 = j31 + input; r9:8=lshift r9:8 by 32;; // Zero out R9 register
/********************************* Benchmark kernel code ****************************************/
main_loop: // Main Loop is executed N/4 times
r5:4=r5:4-r5:4; r7:6=r1:0*r1:0;; // Clear Registers to Zero
LC1 = N/2; k7=k7+4;; // Set Loop Counter, Update Modify Value for Odd-Multiple Fetches
dft_loop: // DFT Loop is executed N/2 times
// for outter loop a=0,2,4,6,8,....., b=1,3,5,7,..
// for inner loop k=-1,1,3,5,......, n=-1,1,3,5,7,...N/2
fr4=r1*r5; fr6=r4+r6; r1:0=cb l[j2+=0]; r5=cb l[k0+=k5];; // xr4 = f(n)*cos(ak), yr4 = f(n)*-sin(ak);
// xr6 = xr6 + real, yr6 = yr6 + imag;
// fetch sin(a(k+1)) and cos(a(k+1));; <-----------------------|
// |
fr8=r3*r9; fr7=r8+r7; r3:2=cb l[j2+=2]; r9=cb l[k1+=k7];; // xr8 = f(n)*cos(bk), yr8 = f(n)*-sin(bk); |
// xr7 = xr7 + real, yr7 = yr7 + imag; |
// fetch sin(b(k+1)) and cos(b(k+1));; <------------| |---Even-Multiple Fetches
// | | Of Sin_Cos Table
fr4=r0*r5; fr6=r4+r6; r5=cb l[k0+=k5];; // xr4 = f(n+1)*cos(a(k+1)), yr4 = f(n+1)*-sin(a(k+1)); | |
// xr6 = xr6 + real, yr6 = yr6 + imag; | |
// fetch sin(a(k+2)) and cos(a(k+2));; <------------|----------|
.align_code 4; // |
if NLC1E, jump dft_loop; fr8=r2*r9; fr7=r8+r7; r9=cb l[k1+=k7];; // xr8 = f(n+1)*cos(b(k+1)), yr8 = f(n+1)*-sin(b(k+1)); |
// xr7 = xr7 + real, yr7 = yr7 + imag; |---Odd-Multiple Fetches
// fetch sin(b(k+2)) and cos(b(k+2));; <------------| Of Sin_Cos Table
fr4=r1*r5; fr6=r4+r6;; // Final Multiply and Accumulate;;
fr8=r3*r9; fr7=r8+r7; k5=k5+4;; // Final Multiply and Accumulate; Update Modify Value for Even-Multiple Fetches;;
fr6=r4+r6; r1:0=lshift r1:0 by 32;; // Final Accumulate; Zero out R1 register;;
fr7=r8+r7; r9:8=lshift r9:8 by 32;; // Final Accumulate; Zero out R9 register;
.align_code 4; // |
if NLC0E, jump main_loop; l[j6+=2]=xr7:6; l[k6+=2]=yr7:6; r3:2=r3:2-r3:2;; // Store final result in Real(J6) and Imaginary(K6) buffers and update pointers
// So the final equation will be:
// N-1
// [real(k)]+j*[imag(k)] = SUM input(n)[C + j*(-S)]; k=0 to N-1
// n=0
// where: C=cos(2*pi*k*n/N), S=sin(2*pi*k*n/N), j=sqrt(-1) //
//
_main.end:
/******************************************* Done ***********************************************/
___lib_prog_term:
jump ___lib_prog_term (NP);; // Done.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -