📄 example 3-18.asm

📁 《基于TI DSP的通用算法实现》程序代码
💻 ASM
📖 第 1 页 / 共 2 页
字号:
12 下一页

;Example 3 - 18. Integer Scaling BFP Based DIT Radix-2 FFT Implementation ASM Listing for TMS320C55x

; THIS PROGRAM IS PROVIDED "AS IS". TI MAKES NO WARRANTIES OR
; REPRESENTATIONS, EITHER EXPRESS, IMPLIED OR STATUTORY, 
; INCLUDING ANY IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS 
; FOR A PARTICULAR PURPOSE, LACK OF VIRUSES, ACCURACY OR 
; COMPLETENESS OF RESPONSES, RESULTS AND LACK OF NEGLIGENCE. 
; TI DISCLAIMS ANY WARRANTY OF TITLE, QUIET ENJOYMENT, QUIET 
; POSSESSION, AND NON-INFRINGEMENT OF ANY THIRD PARTY 
; INTELLECTUAL PROPERTY RIGHTS WITH REGARD TO THE PROGRAM OR 
; YOUR USE OF THE PROGRAM.
;
; IN NO EVENT SHALL TI BE LIABLE FOR ANY SPECIAL, INCIDENTAL, 
; CONSEQUENTIAL OR INDIRECT DAMAGES, HOWEVER CAUSED, ON ANY 
; THEORY OF LIABILITY AND WHETHER OR NOT TI HAS BEEN ADVISED 
; OF THE POSSIBILITY OF SUCH DAMAGES, ARISING IN ANY WAY OUT 
; OF THIS AGREEMENT, THE PROGRAM, OR YOUR USE OF THE PROGRAM. 
; EXCLUDED DAMAGES INCLUDE, BUT ARE NOT LIMITED TO, COST OF 
; REMOVAL OR REINSTALLATION, COMPUTER TIME, LABOR COSTS, LOSS 
; OF GOODWILL, LOSS OF PROFITS, LOSS OF SAVINGS, OR LOSS OF 
; USE OR INTERRUPTION OF BUSINESS. IN NO EVENT WILL TI'S 
; AGGREGATE LIABILITY UNDER THIS AGREEMENT OR ARISING OUT OF 
; YOUR USE OF THE PROGRAM EXCEED FIVE HUNDRED DOLLARS 
; (U.S.$500).
;
; Unless otherwise stated, the Program written and copyrighted 
; by Texas Instruments is distributed as "freeware".  You may, 
; only under TI's copyright in the Program, use and modify the 
; Program without any charge or restriction.  You may 
; distribute to third parties, provided that you transfer a 
; copy of this license to the third party and the third party 
; agrees to these terms by its first use of the Program. You 
; must reproduce the copyright notice and any other legend of 
; ownership on each copy or partial copy, of the Program.
;
; You acknowledge and agree that the Program contains 
; copyrighted material, trade secrets and other TI proprietary 
; information and is protected by copyright laws, 
; international copyright treaties, and trade secret laws, as 
; well as other intellectual property laws.  To protect TI's 
; rights in the Program, you agree not to decompile, reverse 
; engineer, disassemble or otherwise translate any object code 
; versions of the Program to a human-readable form.  You agree 
; that in no event will you alter, remove or destroy any 
; copyright notice included in the Program.  TI reserves all 
; rights not specifically granted under this license. Except 
; as specifically provided herein, nothing in this agreement 
; shall be construed as conferring by implication, estoppel, 
; or otherwise, upon you, any license or other right under any 
; TI patents, copyrights or trade secrets.
;
; You may not use the Program in non-TI devices.


;***********************************************************
; Version 2.20.02                                           
;***********************************************************
; Processor:   C55xx
; Description: Radix-2 DIT complex FFT using normal input data
;    and bit-reversed twiddle table (length N/2, cosine/sine format)
;    All stages are in radix-2.
; Usage:    void cfft_bfp(DATA *xy, ushort nx);
; Copyright Texas instruments Inc, 2000
; History:
; 	- 07/18/2002	Sira fixed the bug in the kernal.
;   - 06/19/2003    David Elam - modified to perform BFP FFT 
;					and support 8-point FFT. 
;****************************************************************

;-----------------------------------------------------------------------
; Arguments passed to _fft
;
; ar0       ->    fftdata pointer
; t0        ->    fft size
;
;-------------------------------------------------------------------------



;//-----------------------------------------------------------------------------
;// Array declarations
;//-----------------------------------------------------------------------------


	.ref	twiddle  ; include twiddle table	
	         


        .def _cfft_bfp     ; make function visible to other fnct
        .cpl_on
        .arms_off     ; enable assembler for arms mode
        .mmregs
        .noremark    5579, 5573, 5549
  
; Stack frame
; -----------
RET_ADDR_SZ       .set 1            ;return address
REG_SAVE_SZ       .set 0            ;save-on-entry registers saved
FRAME_SZ          .set 4            ;local variables
ARG_BLK_SZ        .set 0            ;argument block

PARAM_OFFSET      .set ARG_BLK_SZ + FRAME_SZ + REG_SAVE_SZ + RET_ADDR_SZ


; Local variables
; --------------      
	   .asg	   0, ret_exp      
       .asg    1, data_ptr
       .asg    2, data_sz
       .asg    3, bit_buf

;//-----------------------------------------------------------------------------
;// Temp data
;//-----------------------------------------------------------------------------

		.bss tempmem, 2, , 2
		.bss tempmem1, 2, , 2 

;//-----------------------------------------------------------------------------
;// Register aliases
;//-----------------------------------------------------------------------------
		.asg	T3,groups		; number of butterfly groups
		.asg	T1,index		; addess adjustment
		.asg	AR7,bflies		; number of butterflies/group
		.asg	AR4,k			; outer loop counter
		.asg	AR2,a			; top butterfly pointer
		.asg	AR3,b			; bottom butterfly pointer
		.asg	AR5,temp		; temp register	
		

      
        .text           


_cfft_bfp



;
; Save any save-on-entry registers that are used
;----------------------------------------------------------------
;

      PSH	mmap(ST0_55)
      PSH	mmap(ST1_55)
      PSH	mmap(ST2_55)  

      PSH T2
      PSH T3
	PSHBOTH XAR5
	PSHBOTH XAR6
	PSHBOTH XAR7
	
;
; Allocate the local frame and argument block
;----------------------------------------------------------------
	AADD	#-(ARG_BLK_SZ + FRAME_SZ + REG_SAVE_SZ), SP
	

;
; Save entry values for later
;----------------------------------------------------------------

	MOV AR0, *sp(data_ptr) ; put starting address on stack
    MOV T0, *sp(data_sz)   ; put size on stack
    MOV #0, *sp(ret_exp)   ; initialize stack space for BFP exponent
	MOV #1, *sp(bit_buf)   ; put bit growth buffer on stack for first
						   ; two stages - will be 2 for all following stages

;-----------------------------------------------------------------------
; FFT implementation
;
; The FFT is implemented in 5 different steps:
;
;  1) - a radix-2 stage without any multiplications.
;  2) - a radix-2 stage with two groups, only the 2nd group has
;       multiplications with 0x7FFFFFFH and 0x00000000
;  3) - a group of log2(FFT_SIZE)-3 radix-2 stages
;  4) - a radix-2 stage without scaling.
;  5) - on in-place bit-reversal

;-----------------------------------------------------------------------
; Modification of status registers
;-----------------------------------------------------------------------
	  
        BSET	FRCT
        BSET	SATD
        BSET	SXMD          
        BCLR #ARMS, ST2_55          
	  	BCLR #C54CM, ST1_55 
	  	
	

;***********************************************************************
	CALL scale_exp	; find maximum absolute value from previous stage
						; scale by 2^(exponent of max value - 2)
;***********************************************************************
	
; radix-2 stage 1
;
; 
  
		 MOV T0, T1
		 ADD T0, T1
		 ADD T1, AR0
         MOV #2, T1
         MOV XAR0, XAR2
         MOV XAR0, XAR4
         MOV *sp(data_ptr), AR2		; ar2 = #fftdata  
         MOV AR2, AR4               ; ar4 = #fftdata  
         MOV AR2, AR0
         AADD T0, AR0               ; ar0 = fftdata + fft_size
         MOV XAR0, XAR5				; ar5 = fftdata + fft_size
         MOV T0, T2
         SFTS T2, #-1               ; T2 = fft_size/2 
         MOV T2, AR7
         SUB #2, AR7
         MOV AR7, BRC0
        
        
        
                 ; ac2=Qr[0]:Qi[0] 
        MOV dbl(*(ar0+t1)), ac2      
                
                ; ac0 = Pr[0]+Qr[0]:Pi[0]+Qi[0]
                ;     = Pr'[0]:Pr'[0]
           
       ADD	dual(*AR2), AC2, AC0 
   

        
                ; ac1 = Pr[0]-Qr[0]:Pi[0]-Qi[0]
                ;     = Qr'[0]:Qi'[0]
                ; store P'[0]
       SUB	AC2, dual(*(AR2+T1)), AC1    
      || MOV ac0, dbl(*(ar4+t1))               
                                       
                 
        RPTBLOCAL r2_loop00 -1          


        MOV dbl(*(ar0+t1)), ac2         	; load Q            
        || MOV ac1, dbl(*(ar5+t1))     ; store new Q   
        
           
         ADD	dual(*AR2), AC2, AC0        ; new P   
            
          SUB	AC2, dual(*(AR2+T1)), AC1   ; new Q 
      || MOV ac0, dbl(*(ar4+t1))       ; store new P  

r2_loop00:	
		 
                    
         MOV ac1, dbl(*(ar5+t1))     ; store new Q   

;***********************************************************************
	CALL scale_exp	; find maximum absolute value from previous stage
						; scale by 2^(exponent of max value - 2)
;***********************************************************************

; radix-2 stage 2
;
; groupe #1 twiddle = #1  


         MOV *sp(data_ptr), AR2		; ar2 = #fftdata  
         MOV AR2, AR4               ; ar4 = #fftdata  
         MOV AR2, AR0
         ADD T2, AR0                ; ar0 = fftdata + fft_size/2
         MOV AR0, AR5				; ar5 = fftdata + fft_size/2  
         MOV T2, T3                 ; T3= fft_size/2
         SFTS T2, #-1               ; T2 = fft_size/4 
         SUB #2, T2
         MOV T2, BRC0               ; BRC0 = fft_size/4 - 2
       
                                	
 
                   ; ac2=Qr[0]:Qi[0] 
        MOV dbl(*(ar0+t1)), ac2       
                
                ; ac0 = Pr[0]+Qr[0]:Pi[0]+Qi[0]
                ;     = Pr'[0]:Pr'[0]
                ; cdp = #twiddle
       ADD	dual(*AR2), AC2, AC0 
   

        
                ; ac1 = Pr[0]-Qr[0]:Pi[0]-Qi[0]
                ;     = Qr'[0]:Qi'[0]
                ; store P'[0]
       SUB	AC2, dual(*(AR2+T1)), AC1    
      || MOV ac0, dbl(*(ar4+t1))               
                                       
                 
        RPTBLOCAL r2_loop01 -1          


        MOV dbl(*(ar0+t1)), ac2         	; load Q            
        || MOV ac1, dbl(*(ar5+t1))     ; store new Q   
        
           
         ADD	dual(*AR2), AC2, AC0        ; new P   
 
       SUB	AC2, dual(*(AR2+T1)), AC1   ; new Q 
      || MOV ac0, dbl(*(ar4+t1))       ; store new P  

r2_loop01:	
		 
                    
         MOV ac1, dbl(*(ar5+t1))     ; store new Q    
                 
 
; radix-2 stage 2
;
; groupe #2 twiddle = #-1    


         MOV *sp(data_ptr), AR2		; ar2 = #fftdata  
         ADD T0, AR2				; ar2 = #fftdata + fft_size
         MOV AR2, AR4               ; ar4 = #fftdata + fft_size  
         MOV AR2, AR0
         ADD T3,  AR0               ; ar0 = #fftdata + 3/2*fft_size 
         MOV AR0, AR1
         ADD #1, AR1
         MOV AR0, AR5				; ar5 = #fftdata + 3/2*fft_size
         MOV T2, BRC0               ; ; BRC0 = fft_size/4 - 2                           	
         AMOV #twiddle, xar3		; 
         ADD #2, AR3    			; 
         MOV XAR3, XCDP				; 

                      
                    ; ac2=Qr[n]*Wr                                    
                    ; ac3=Qi[n]*Wr
          MPY	*AR0, *CDP+, AC2               
        ::MPY	*AR1, *CDP+, AC3  
       
                   ; ac3=Qi[n]*Wr+Qr[n]*Wi
                    ; ac2=Qr[n]*Wr-Qi[n]*Wi

         
          MASR	*(AR0+T1), *CDP-, AC3
         ::MACR	*(AR1+T1), *CDP-, AC2 
        
                    ; ac2=(Qr[n]*Wr-Qi[n]*Wi):(Qi[n]*Wr+Qr[n]*Wi)

         
         OR AC3 << #-16, AC2
         
                    ; hi(ac0)=Pr[n]+(Qr[n]*Wr-Qi[n]*Wi)=Pr'[n]
                    ; lo(ac0)=Pi[n]+(Qr[n]*Wi+Qi[n]*Wr)=Pi'[n]
                             
         	   ADD	dual(*AR2), AC2, AC0            
   
                       ; hi(ac1)=Pr[n]-(Qr[n]*Wr-Qi[n]*Wi)=Qr'[n] 
                    ; lo(ac1)=Pi[n]-(Qr[n]*Wi+Qi[n]*Wr)=Qi'[n]
                    ; store Pr'[n]:Pi'[n]

    	SUB	AC2, dual(*(AR2+T1)), AC1
		|| MOV ac0, dbl(*(ar4+t1)) 
   
                                                          
        
          RPTBLOCAL r2_loop02          


                    ; ac2=Qr[n]*Wr                                    
                    ; ac3=Qi[n]*Wr
          MPY	*AR0, *CDP+, AC2               
        ::MPY	*AR1, *CDP+, AC3                           
                
        
                    ; ac3=Qi[n]*Wr+Qr[n]*Wi
                    ; ac2=Qr[n]*Wr-Qi[n]*Wi  --correct commenting!

          
        MASR	*(AR0+T1), *CDP-, AC3   
        :: MACR	*(AR1+T1), *CDP-, AC2  
            
                    ; ac2=(Qr[n]*Wr-Qi[n]*Wi):(Qi[n]*Wr+Qr[n]*Wi)

         
          OR AC3 << #-16, AC2     
                    ; hi(ac0)=Pr[n]+(Qr[n]*Wr-Qi[n]*Wi)=Pr'[n]
                    ; lo(ac0)=Pi[n]+(Qr[n]*Wi+Qi[n]*Wr)=Pi'[n]
                    ; store Qr'[n-1]:Qi'[n-1]

	   ADD	dual(*AR2), AC2, AC0
	   ||  MOV ac1, dbl(*(ar5+t1)) 
    
    
                    ; hi(ac1)=Pr[n]-(Qr[n]*Wr-Qi[n]*Wi)=Qr'[n] 
                    ; lo(ac1)=Pi[n]-(Qr[n]*Wi+Qi[n]*Wr)=Qi'[n]
                    ; store Pr'[n]:Pi'[n]

r2_loop02:	SUB	AC2, dual(*(AR2+T1)), AC1
		|| MOV ac0, dbl(*(ar4+t1)) 
        
                                   ; end of butterfly loop
                              
          MOV ac1, dbl(*(ar5+t1))  
            
          MOV *sp(data_sz), T0		  
		  SUB #8, T0		  
		  BCC final_stage, T0==#0  	; branch to final stage for 8-point fft

 
;-----------------------------------------------------------------------
; radix-2 stages (stages 3->log2(FFT_SIZE) )
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -