⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 dft.asm

📁 基于TS201 32-bit floating point DFT routine
💻 ASM
字号:
/*       Floating point DFT	

 	dft.asm

  Revision history:

	12/03/99  Boris Lerner			Initial version
	04/27/00  Boris Lerner          Added ___lib_prog_term as label at the end for 
	                                the simulator to print total cycles


TigerSHARC real DFT, Number of points = N must be a multiple of 4

  Cycle counts  |                   	      |  
                |  Formula             Cycles |  
  --------------+-----------------------------+
  Initialization|    12                  12   |   
  Kernel code   |  (N/4)*(2*N+10)+8    2216   | 
  Termination   |     1                   1   | 


  Memory usage      |       Bytes
  ------------------+-------------
  Initialization    |         48
  Kernel code       |        128
  Termination       |          8
  ------------------+-------------
  Constant Data     |        512
  Non-constant Data |        768 

  The example waveform has a sum of 5 sinewaves. 
  
*************************************************************************/

#ifdef __ADSPTS201__
    #include <defts201.h>
#endif

#include "cache_macros.h"


#define	         N                      64

/************************************************************************/
.section data1;

.var input[N] = "test64.dat";           // Input Data
.var output_real[N];                    // Real Output 

/************************************************************************/
.section data2;

.var output_imag[N];                    // Imaginary Output 
.var sin_cos[2*N] = "sin_cos.dat";      // Sine and Cosine Interleaved

/************************************************************************/
.section program;
.global _main;

/************************************** Start of code *****************************************/
_main:

#ifdef __ADSPTS201__#ifdef __ADSPTS201__
/*in the case of TS201, at the beginning of the program the
cache must be enabled. The procedure is contained in the
cache_enable macro that uses the refresh rate as input parameter
      -if CCLK=500MHz, refresh_rate=750
      -if CCLK=400MHz, refresh_rate=600
      -if CCLK=300MHz, refresh_rate=450
      -if CCLK=250MHz, refresh_rate=375
*/
  cache_enable(750);
  
#endif  

		k6=k31+output_imag;;            // K6 --> Imaginary Output
        j6=j31+output_real; k5=k31+0;;  // J6 --> Real Output
        LC0=N/4; k7=k31-2;;		 
	
        kB0 = k31 + sin_cos;;           // K0 --> Sine/Cosine Table (Circular Buffer)
        kL0 = k31 + 2*N;;
        k0 = k31 + sin_cos;;		
	
        kB1 = k31 + sin_cos;;           // K1 --> Sine/Cosine Table (Circular Buffer)
        kL1 = k31 + 2*N;;
        k1 = k31 + sin_cos;;							
	
        jB2 = j31 + input; r1:0=r1:0-r1:0;;         // J2 --> Input (Circular Buffer)
        jL2 = j31 + N; r3:2=r3:2-r3:2;;
        j2 = j31 + input; r9:8=lshift r9:8 by 32;;  // Zero out R9 register
	
/********************************* Benchmark kernel code ****************************************/
main_loop:    // Main Loop is executed N/4 times

                r5:4=r5:4-r5:4; r7:6=r1:0*r1:0;;                // Clear Registers to Zero					
                LC1 = N/2; k7=k7+4;;		                // Set Loop Counter, Update Modify Value for Odd-Multiple Fetches
		
dft_loop:     // DFT Loop is executed N/2 times
                                                                                                     // for outter loop a=0,2,4,6,8,.....,     b=1,3,5,7,..
                                                                                                     // for inner loop  k=-1,1,3,5,......,     n=-1,1,3,5,7,...N/2
            
                        fr4=r1*r5; fr6=r4+r6; r1:0=cb l[j2+=0]; r5=cb l[k0+=k5];;               // xr4 = f(n)*cos(ak), yr4 = f(n)*-sin(ak);
                                                                                                         // xr6 = xr6 + real, yr6 = yr6 + imag;
                                                                                                                  // fetch sin(a(k+1)) and cos(a(k+1));; <-----------------------|
                                                                                                //                                                                               |
                        fr8=r3*r9; fr7=r8+r7; r3:2=cb l[j2+=2]; r9=cb l[k1+=k7];;               // xr8 = f(n)*cos(bk), yr8 = f(n)*-sin(bk);                                      |
                                                                                                         // xr7 = xr7 + real, yr7 = yr7 + imag;                                  |
                                                                                                                  // fetch sin(b(k+1)) and cos(b(k+1));; <------------|          |---Even-Multiple Fetches
                                                                                                //                                                                    |          |            Of Sin_Cos Table
                        fr4=r0*r5; fr6=r4+r6; r5=cb l[k0+=k5];;		 	                // xr4 = f(n+1)*cos(a(k+1)), yr4 = f(n+1)*-sin(a(k+1));               |          |
                                                                                                         // xr6 = xr6 + real, yr6 = yr6 + imag;                       |          |
                                                                                                                  // fetch sin(a(k+2)) and cos(a(k+2));; <------------|----------|      
.align_code 4;                                                                                                //                                                                    |
                        if NLC1E, jump dft_loop; fr8=r2*r9; fr7=r8+r7; r9=cb l[k1+=k7];;	// xr8 = f(n+1)*cos(b(k+1)), yr8 = f(n+1)*-sin(b(k+1));               |
                                                                                                         // xr7 = xr7 + real, yr7 = yr7 + imag;                       |---Odd-Multiple Fetches
                                                                                                                  // fetch sin(b(k+2)) and cos(b(k+2));; <------------|   Of Sin_Cos Table
			
                fr4=r1*r5; fr6=r4+r6;;                           // Final Multiply and Accumulate;; 
                fr8=r3*r9; fr7=r8+r7; k5=k5+4;;                  // Final Multiply and Accumulate; Update Modify Value for Even-Multiple Fetches;; 
                fr6=r4+r6; r1:0=lshift r1:0 by 32;;		 // Final Accumulate; Zero out R1 register;;			
                fr7=r8+r7; r9:8=lshift r9:8 by 32;;	         // Final Accumulate; Zero out R9 register;
.align_code 4;                                                                                                //                                                                    |
                if NLC0E, jump main_loop; l[j6+=2]=xr7:6; l[k6+=2]=yr7:6; r3:2=r3:2-r3:2;;	// Store final result in Real(J6) and Imaginary(K6) buffers and update pointers
                                                                                                // So the final equation will be:
                                                                                                //                         N-1
                                                                                                // [real(k)]+j*[imag(k)] = SUM input(n)[C + j*(-S)]; k=0 to N-1
                                                                                                //                         n=0
                                                                                                // where: C=cos(2*pi*k*n/N), S=sin(2*pi*k*n/N), j=sqrt(-1)                                                                              //
                                                                                                //
                                                                                                
_main.end:                                                                                                
                                                                                                
/******************************************* Done ***********************************************/
___lib_prog_term:
        jump ___lib_prog_term (NP);;                            // Done.

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -