📄 example 3-18.asm

📁 《基于TI DSP的通用算法实现》程序代码
💻 ASM
📖 第 1 页 / 共 2 页
字号:
上一页 12
        ; main initialization
        ; --------------------
       ADD #1, *sp(bit_buf)			; define next stage bit-growth
        
       MOV *sp(data_ptr), AR0		; ar0 = #fftdata
       ;MOV  #twiddle, AR3 
            ; 1 - circular buffer initializations

        
            ; modify ST2 to select linear or circular addressing modes
       OR	#0x57 , mmap(ST2_55)  ; circular ar0, ar1, ar2, ar4 and ar6
         

            ; circular buffer starting addresses
        ADD #2, ar0,ar1
        MOV mmap(@ar0),bsa23       ; P  leg
        MOV mmap(@ar0),bsa45       ; P' leg    (in-place butterflies)
        MOV mmap(@ar1),bsa01       ; Q  leg
        MOV mmap(@ar1),bsa67       ; Q' leg    (in-place butterflies)
                            
            ; circular buffer sizes 
	  MOV *sp(data_sz), T0
        MOV T0, T1		; T1 =  #FFT_SIZE
	  SFTS T0, #1        
        SUB #2, T0
    
        MOV mmap(@T0_L), bk03          	; bk03 = (2*FFT_SIZE-2), 
        MOV mmap(@T0_L), bk47   ; bk47 = (2*FFT_SIZE-2), 

            ; 2 - register initializations
        
       MOV *sp(data_sz), T0
       SFTS T0, #-1
       SFTS T0, #-1
        
        MOV T0, ar5   ; 2*number of groups for stage 3    
        MOV #2, ar7          ; 1/2 * number of butterflies per group for stage 3
       

        ;AMOV #twiddle, xar3        ; not modified during iteration of the stages
        MOV xar0,xar1            ; data page = data page of indata
        MOV xar0,xar2            ; data page = data page of indata
        MOV xar0,xar4   ; data page = data page of indata (in-place butterflies)
        MOV xar0,xar6   ; data page = data page of indata (in-place butterflies)
        MOV xar3,xcdp      ; data page = data page of twiddles
        
        
stage:    ; begining of a new stage

        ; stage initialization
        ; --------------------
              
;***********************************************************************
	CALL scale_exp	; find maximum absolute value from previous stage
						; scale by 2^(exponent of max value - 2)
;***********************************************************************

            ; 1 - loop counter updates
        
            ; butterfly counter update
       SFTS ar7, #1
    ||    MOV #0, ar4 
        SUB #2,ar7,ar0
        MOV ar0, brc1        ;1 butterflies executed out of the loop

            ; group counter update
        SFTS ar5, #-1
        SUB  #1,ar5,ar0     
        MOV ar0, brc0         

            ; stage "counter" update (t0=0 <=> last stage)
         AMOV ar0, t0

            ; 2 - pointer modifier update
                            
            ; t1 is the pointer modifier                    
    ||   SFTS  t1,#-1        

            ; 3 - index updates                  
            ; ar0 -> Qr[0]
            ; ar1 -> Qi[0]
            ; ar2 -> Pr[0]
            ; ar4 -> Pr'[0]  (performed above to use parallelism)
            ; ar6 -> Qr'[0]

 
        SFTS ar0, #1
        ADD #1, ar0,ar1
        MOV #0, ar2
    ||  MOV ar0, ar6 
        
        
        
group:  ; begining of a new group
        

        ; note:
        ; - circular buffers are used for data pointers so that no
        ;   initialization of data pointers is required before a new
        ;   iteration of a group
        ; - twiddle pointer (cdp) is updated inside the loop
        
                
        RPTBLOCAL r2_loop1

        
                ; ac2=Qr[0]:Qi[0] 
        MOV dbl(*(ar0+t1)), ac2
    ||  ADD  t1,ar1       
                
                ; ac0 = Pr[0]+Qr[0]:Pi[0]+Qi[0]
                ;     = Pr'[0]:Pr'[0]
                ; cdp = #twiddle
       ADD	dual(*AR2), AC2, AC0 
      ||    MOV ar3, cdp

        
                ; ac1 = Pr[0]-Qr[0]:Pi[0]-Qi[0]
                ;     = Qr'[0]:Qi'[0]
                ; store P'[0]
       SUB	AC2, dual(*(AR2+T1)), AC1    
      || MOV ac0, dbl(*(ar4+t1))               
                                                          
        ; --------------------------------------------------------------        
        ; DIT radix-2 butterfly
        ;
        ; Register usage:
        ; 
        ;     address registers
        ; ar0 = Qr
        ; ar1 = Qi
        ; ar2 = Pr:Pi
        ; ar4 = Pr':Pi'
        ; ar6 = Qr':Qi'
        ; cdp = Wr:Wi
        ;
        ;     temporary registers
        ; t1 pointer modifier (jump between two consecutive legs)
        ; t2, t3 - both modified
        ;
        ;     accu registers
        ; ac0, ac1, ac2, ac3 are all modified
        ; 
        ;
        ; Comments:
        ;
        ; This butterfly supports both in-place and out-if-place
        ; implementations.
        ;
        ; input data assumed to be in order (bit-reversion after fft)
        ;
        ; twiddle table
        ;   - assumed to be in bit-reversed order
        ;     - the twiddle factors stored in the tables are the pairs
        ;      {cos(2*pi*k/N) , sin(2*pi*k/N)} = {Wr[k] , -Wi[k]}
        ;   - first twiddle pair {1,0} is suppressed (since the first
        ;       butterfly is computed out of the inner loop)
        ;   - second twiddle pair {0,1} replaced by {0,-1} (-1 can be
        ;     represented with full precision but not 1)
        ;   - when inner loop starts, cdp should point to the
        ;     third twiddle pair {1/sqrt(2),1/sqrt(2)}
        ;
        ; pointer registers
        ;   ar0 -> Qr[]
        ;   ar1 -> Qi[]
        ;   ar2 -> Pr[]
        ;   ar4 -> Pr'[]
        ;   ar6 -> Qr'[]
        ;   cdp -> W[] (bit-reversed twiddle table)
        ;
        ; --------------------------------------------------------------        
        
        
          RPTBLOCAL r2_loop2          


                    ; ac2=Qr[n]*Wr                                    
                    ; ac3=Qi[n]*Wr
          MPY	*AR0, *CDP+, AC2               
        ::MPY	*AR1, *CDP+, AC3                           
                
        
                    ; ac3=Qi[n]*Wr+Qr[n]*Wi
                    ; ac2=Qr[n]*Wr-Qi[n]*Wi

          
         MASR	*(AR0+T1), *CDP+, AC3   
         ::MACR	*(AR1+T1), *CDP+, AC2   
            
                    ; ac2=(Qr[n]*Wr-Qi[n]*Wi):(Qi[n]*Wr+Qr[n]*Wi)

         
          OR AC3 << #-16, AC2       
                    ; hi(ac0)=Pr[n]+(Qr[n]*Wr-Qi[n]*Wi)=Pr'[n]
                    ; lo(ac0)=Pi[n]+(Qr[n]*Wi+Qi[n]*Wr)=Pi'[n]
                    ; store Qr'[n-1]:Qi'[n-1]

	   ADD	dual(*AR2), AC2, AC0
	   ||  MOV ac1, dbl(*(ar6+t1)) 
    
    
                    ; hi(ac1)=Pr[n]-(Qr[n]*Wr-Qi[n]*Wi)=Qr'[n] 
                    ; lo(ac1)=Pi[n]-(Qr[n]*Wi+Qi[n]*Wr)=Qi'[n]
                    ; store Pr'[n]:Pi'[n]

r2_loop2:	SUB	AC2, dual(*(AR2+T1)), AC1
		|| MOV ac0, dbl(*(ar4+t1)) 
        
                                   ; end of butterfly loop
        
r2_loop1:        MOV AC1, dbl(*(ar6+t1))      ; store last Qr':Qi'
        
                                   ; end of group loop

        SUB #1, T0
        
        BCC stage,t0!=#0       ; end of stage loop 
   	   
;***********************************************************************
	CALL scale_exp	; find maximum absolute value from previous stage
						; scale by 2^(exponent of max value - 2)
;***********************************************************************

; last r2 stage
; no scaling performed
;
; modify ST2 to select linear or circular addressing modes
final_stage:   AND	#0xF000 , mmap(ST2_55)  ; circular ar0, ar1, ar2, ar4 and ar6
      
         MOV #2, T1  
         MOV *sp(data_sz), T0       ; t0 = #fftsize
         MOV *sp(data_ptr), AR2		; ar2 = #fftdata  
         MOV AR2, AR4               ; ar4 = #fftdata  
         MOV AR2, AR0
         AADD T1, AR0               ; ar0 = fftdata + 2 
         MOV #7, AR1
         ADD AR2, AR1               ; 
         MOV AR0, AR5				; ar5 = fftdata + 2 
         SFTS T0, #-1  
         SUB #2, T0
         MOV T0, BRC0               ; brc0 = fftsize/2-2 
         MOV #4, T1
         MOV ar3, cdp             
                      
              ; ac2=Qr[0]:Qi[0] 
        MOV dbl(*(ar0+t1)), ac2
       
                
                ; ac0 = Pr[0]+Qr[0]:Pi[0]+Qi[0]
                ;     = Pr'[0]:Pr'[0]
                ; cdp = #twiddle
       ADD	dual(*AR2), AC2, AC0 

        
                ; ac1 = Pr[0]-Qr[0]:Pi[0]-Qi[0]
                ;     = Qr'[0]:Qi'[0]
                ; store P'[0]
       SUB	AC2, dual(*(AR2+T1)), AC1    
      || MOV ac0, dbl(*(ar4+t1))               
                                                          
        
          RPTBLOCAL r2_loop3          


                    ; ac2=Qr[n]*Wr                                    
                    ; ac3=Qi[n]*Wr
          MPY	*AR0, *CDP+, AC2               
        ::MPY	*AR1, *CDP+, AC3                           
                
        
                    ; ac3=Qi[n]*Wr+Qr[n]*Wi
                    ; ac2=Qr[n]*Wr-Qi[n]*Wi

        MASR	*(AR0+T1), *CDP+, AC3  
        ::MACR	*(AR1+T1), *CDP+, AC2   
            
            
                    ; ac2=(Qr[n]*Wr-Qi[n]*Wi):(Qi[n]*Wr+Qr[n]*Wi)

        
          OR AC3 << #-16, AC2      
                    ; hi(ac0)=Pr[n]+(Qr[n]*Wr-Qi[n]*Wi)=Pr'[n]
                    ; lo(ac0)=Pi[n]+(Qr[n]*Wi+Qi[n]*Wr)=Pi'[n]
                    ; store Qr'[n-1]:Qi'[n-1]

	   ADD	dual(*AR2), AC2, AC0
	   ||  MOV ac1, dbl(*(ar5+t1)) 
    
    
                    ; hi(ac1)=Pr[n]-(Qr[n]*Wr-Qi[n]*Wi)=Qr'[n] 
                    ; lo(ac1)=Pi[n]-(Qr[n]*Wi+Qi[n]*Wr)=Qi'[n]
                    ; store Pr'[n]:Pi'[n]

r2_loop3:	SUB	AC2, dual(*(AR2+T1)), AC1
		|| MOV ac0, dbl(*(ar4+t1)) 
        
                                   ; end of butterfly loop
                              
                      
        MOV AC1, dbl(*(ar5+t1))
      
              
end_benchmark:

	MOV *sp(ret_exp), T0		; return total number of shifts in AC0
;
; Allocate the local frame and argument block
;----------------------------------------------------------------
	AADD	#(ARG_BLK_SZ + FRAME_SZ + REG_SAVE_SZ), SP


;Context restore 



	POPBOTH XAR7
	POPBOTH XAR6
	POPBOTH XAR5
      POP T3
      POP T2 

      POP	mmap(ST2_55)
      POP	mmap(ST1_55)
      POP	mmap(ST0_55)

	RET 
	
;*******************************************************************************
; scale_exp:   Find maximum absolute value in array specified by 
; 		   	   address AR3 and length T0. Pre-scale the input to next
;			   radix-2 stage by 2^(exponent of the maximum absolute 
;			   value - 1) for first two stages and by 2^(exponent of 
;			   the maximum absolute value - 2) for all following stages
;*******************************************************************************
scale_exp:
	PSH dbl(AC0)				; save registers to be modified	(stack size += 12)
	PSH dbl(AC1)
	PSH dbl(AC2)
	PSH dbl(AC3)
	PSH AR3				
	PSH T0
	PSH T1

; find maximum absolute value (real and imaginary)
;-------------------------------------------------------------------------------
	MOV *sp(data_ptr + 12), AR3	; load array size into AR3 (sp offset computed from 
	MOV *sp(data_sz + 12), T0	; above pushed registers and the return address

	SUB #2, T0
    || MOV dbl(*AR3+), AC0		; AC0_H = x_Re[0],  AC0_L = x_Im[1]

    MOV T0, BRC0				; BRC0 = NX - 2
    || MOV dbl(*AR3), AC1 		; AC1_H = x_Re[2],  AC1_L = x_Im[3]

	MOV dbl(*AR3+), AC2			; AC2_H = x_Re[2],  AC1_L = x_Im[3]		
    || RPTBLOCAL find_max  
	
	MAXDIFF AC0, AC1, AC1, AC3	; AC1_H/L contain max's

find_max: MOV dbl(*AR3+), AC0	; AC0_H = x_Re[n],  AC0_L = x_Imn+1]
    || MINDIFF AC0, AC2, AC2, AC3; AC2_H/L contain min's
	
	MOV HI(AC1), AR3			; AR3 = even max value
	MOV AC1, T0					; T0  = odd  max value	   				
	MAX AR3, T0					
	
	MOV HI(AC2), T1				; T1  = odd  min value
	MOV AC2, AR3				; AR3 = even min value
	MIN AR3, T1
	ABS T1
	
	MAX T0, T1
	MOV T1, HI(AC0)				; put maximum absolute value in high part of AC0			
	
; scale array according to maximum asbolute value
;----------------------------------------------------------------------------	
	EXP AC0, T1					; determine exponent from maximum absolute value
	MOV *sp(bit_buf + 12), AR3	; AR3 contains expected bit growth
	SUB AR3, T1					; subtract 1 or 2 from the exponent to compensate 
								; for the maximum possible bit growth (factor of 
								; 2 or 4) in the next radix-2 stage	
	
	BCC no_scale, T1 == #0		; branch if no scaling needed
	ADD *sp(ret_exp + 12), T1, AC0	; sum current with previous exponent
	MOV *sp(data_sz + 12), T0   ; put nx in T0 for next loop
	MOV AC0, *sp(ret_exp + 12)	; put exponent back on stack
    || SUB #1, T0		

	MOV *sp(data_ptr + 12), AR3
    || MOV T0, BRC0				; initialize loop counter (BRC0 = nx - 1)
				
						
	RPTBLOCAL scale	 
	MOV dbl(*AR3), pair(LO(AC0))			
	MOV AC0<<T1, *AR3+			; scale by 2^(exponent of the maximum absolute 
scale: MOV AC1<<T1, *AR3+		; value - 2)

no_scale:	POP T1				; restore registers
	POP T0
	POP AR3
	POP dbl(AC3)
	POP dbl(AC2)
	POP dbl(AC1)
	POP dbl(AC0)
	
	RET		
;-------------------------------------------------------------------------------


	.end
上一页 12
💿 文件大小 223.37K
👤 上传用户 yueyan51
📂 所属分类 DSP
🏷️ 相关标签

#DSP #算法 #程序 #代码
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -