📄 example 3-11.asm

📁 《基于TI DSP的通用算法实现》程序代码
💻 ASM
📖 第 1 页 / 共 3 页
字号:
	ssbx OVM 			; 1 cycle, MUST turn overflow mode on.
	stm #0040h, AR_X
	stl a, *AR_X
	ld *AR_X,16,a
					;Acc A contains the MaxAbs value...so just start performing operation
	exp a 				; 1 cycle, delay – slot
	nop 				; 1 cycle
	nop 				; 1 cycle
	norm a 				; 1 cycle
	st t,*sp(SP_TEMP) 		; store exponent computed by EXP instruction earlier
	ld #InvYeTable,b 		; 2 cycles
	add *sp(SP_TEMP),b 		; 1 cycle
	stl b,*(AR_TABLE) 		; 1 cycle
	sth a,*sp(SP_XNORM) 		; 1 cycle, AR2 points to appropriate Ye value in table.
	sfta a,–1			; 1 cycle, Estimate the first Ym value.
	xor #01FFFh,16,a 		; 2 cycles
	sth a,*AR_Z 			; store result in auxiliary register
		;––––––––––––––––––––––––––––––––––––––––––––––––
		; First two iterations:
		;––––––––––––––––––––––––––––––––––––––––––––––––
   .loop 2
	ld *AR_Z,15,a 			; 2 cycles, Calculate Ym = 2*Ym – Ym^2*X
	ld *AR_Z,t 			; 1 cycle
	mpy *sp(SP_XNORM),b 		; 1 cycle
	sth b,1,*AR_Z 			; 2 cycles
	mpy *AR_Z,b 			; 1 cycle
	sub b,1,a 			; 1 cycle
	sth a,2,*AR_Z 			; 2 cycles
   .endloop

		;––––––––––––––––––––––––––––––––––––––––––––––––––––
		; Final iteration: – this code is same as above loop, except
		; last instruction omitted
		;––––––––––––––––––––––––––––––––––––––––––––––––––––
	ld *AR_Z,15,a 			; 2 cycles, Calculate Ym = 2*Ym – Ym^2*X
	ld *AR_Z,t 			; 1 cycle
	mpy *sp(SP_XNORM),b 		; 1 cycle
	sth b,1,*AR_Z 			; 2 cycles
	mpy *AR_Z,b 			; 1 cycle
	sub b,1,a 			; 1 cycle
	st #07000h,*AR_Z 		; 2 cycles, Make sure that 8000h <= Ym < 7FFFh
	add *AR_Z,16,a 			; 1 cycle
	sub *AR_Z,16,a 			; 1 cycle
	sub *AR_Z,16,a 			; 1 cycle
	add *AR_Z,16,a 			; 1 cycle
	sth a,3,*AR_Z 			; 2 cycles
	ld *AR_TABLE, t 		; setup for MPY
	bc div1207, ntc 		; if TC=0, then divide by 1.207

div4:
	ld *AR_Z, a 			; store the value of r into Acc A
	sfta a, –2 			; divide by 4 = r/4
	stl a, *AR_Z 			; r available at *AR_Z again
bmultiply
					;ld *AR_TABLE,a ; 1 cycle, Read exponent value from table.
					;stl a,*AR_ZEXP ; 1 cycle
div1207:
	stm #0500h, ar5
	st #cmprval_2, *ar5
	ld *ar5, t 			; load 0.8284 into Treg
	mpy *AR_Z, a 			; r * 0.8284
	ld *AR_TABLE, t 		; re–enter exponent
	stl a, *AR_Z			 ; restore magnitude (r * 0.8284) to AR_Z
	nop
multiply:
	MPY *AR_Z, a 			; = {(r/4)*rexp} OR {(r * 0.8284)*rexp}
	rsbx ovm
	rsbx frct
	rsbx tc
	popm ar5
	popm ar4
	popm ar3
	popm ar2
	popm ar1
end_reciprocal:
	ret
********************* End of “RECIPROCAL” routine ********************
 
					; Return
					;––––––––
	end_lab				;	
	frame +2
	popm ar7
	popm ar6
	popm ar1
   .if __far_mode
	fretd
   .else
	retd
   .endif

	rsbx frct
	rsbx ovm
	.def InvYeTable
	.data
InvYeTable:
	.word 0002h ; Ye = 2^1
	.word 0004h ; Ye = 2^2
	.word 0008h Ye = 2^3
	.word 0010h ; Ye = 2^4
	.word 0020h ; Ye = 2^5
	.word 0040h ; Ye = 2^6
	.word 0080h ; Ye = 2^7
	.word 0100h ; Ye = 2^8
	.word 0200h ; Ye = 2^9
	.word 0400h ; Ye = 2^10
	.word 0800h ; Ye = 2^11
	.word 1000h ; Ye = 2^12
	.word 2000h ; Ye = 2^13
	.word 4000h ; Ye = 2^14
	.word 8000h ; Ye = 2^15
	;end of file. please do not remove. it is left here to ensure that no lines of code are removed by any editor


A.2 Cbrev.asm
;*********************************************************************
; Function: cbrev
; Description: complex bit–reverse routine (C54x)
; Version: 1.00
;
; Copyright Texas instruments Inc, 1998
;––––––––––––––––––––––––––––––––––
; Revision History:
; 1.00 R. Piedra, 8/31/98. Original release.
;*********************************************************************

	.mmregs
   .if __far_mode
	offset .set 1
   .else
	offset .set 0
   .endif
					; stack description
	.asg (0), ret_addr
					; x in A
	.asg (1+ offset), arg_y
	.asg (2+ offset), arg_n
					; register usage
					; ar0 : bit reversing idx
	.asg ar2,ar_dst
	.asg ar3,ar_src
	.global _cbrev

	.text

_cbrev
	ssbx frct 			; fractional mode is on (1)
	ssbx sxm 			; (1)
					; Get arguments
					; –––––––––––––
	stlm a, ar_src 			; pointer to src (1)
	mvdk *sp(arg_y), *(ar_dst) 	; pointer to dst (temporary) (2)
	ld *sp(arg_n), a 		; a = n (1)
	stlm a, AR0 			; AR0 = n = 1/2 size of circ buffer (1)
	sub #3,a 			; a = n–3(by pass 1st and last elem)(2)
					; Select in–place or off–place bit–reversing
					; ––––––––––––––––––––––––––––––––––––––––––
	ldm ar_src,b 			; b = src_addr (1)
	sub *sp(arg_y),b 		; b = src_addr – dst_addr (1)
	bcd in_place, beq 		; if (ar_src==ar_dst)then in_place (2)
	stlm a, brc 			; brc = n–3 (1)
	nop 				; (1)	
					; Off–place bit–reversing
					; –––––––––––––––––––––––
off_place:
_start1:
					; unroll to fill delayed slots
	rptbd off_place_end–1 		; (2)
	mvdd *ar_src+,*ar_dst+ 		; move real component (1)
	mvdd *ar_src–,*ar_dst+ 	; move Im component (1)
	mar *ar_src+0B 			; (1)
	mvdd *ar_src+,*ar_dst+ 		; move real component (1)
	mvdd *ar_src–,*ar_dst+ 	; move Im component (1)
off_place_end:
	mar *ar_src+0B 			; (1)
	bd end ; (2)
	mvdd *ar_src,*ar_dst+ 		; move real component (1)
	mvdd *ar_srcC,*ar_dst+ 		; move Im component (1)
					; In–place it–reversing
					; –––––C––––––––––––––––
in_place:   
	mar *ar_src+B 			; bypass first and last element (1)
	mar *+ar_dst2) 			; (1)
_start2:    
	rptbd in_plae_end–1 		; (2)
	ldm ar_src,a			; b = src_addr (1)
	ldm ar_dst,  			; a = dst_addr (1)
	sub b,a 			; a = src_addr – dst_addr (1)
					; if >=0 bypass move just increment
	bcd bypass, ageq 		; if (src_addr>=dst_addr) then skip(2)
	ld *ar_dst+, a 			; a = Re dst element (preserve) (1)
	ld *ar_dst–, b 		; b = Im dst element (preserve) (1)
	mvdd *ar_src+, *ar_dst+ 	; Re dst = Re src (1)
	mvdd *ar_src , *ar_dst– 	; Im dst = Im src;point to Re (1)
	stl b, *ar_src– 		; Im src = b = Im dst;point to Re (1)
	stl a, *ar_src 			; Re src = a = Re dst (1)
bypass
	mar *ar_src+0B 			; (1)
	mar *+ar_dst(2) 		; (1)
	ldm ar_src,a 			; b = src_addr (1)
	ldm ar_dst, b 			; a = dst_addr (1)

in_place_end
					; Return
					; ––––––
_end:
	end
   .if __far_mode
	fretd
   .else
	retd
   .endif

	rsbx frct
	rsbx ovm
	;end of file. please do not remove. it is left here to ensure that no lines of code are removed by any editor


A.3 Macros.asm
;*********************************************************************
; Filename: macros.asm
; Version : 1.00
; Description: collections of macros for cfft
;––––––––––––––––––––––––––––––––––
; Description: Contains the following macros
;––––––––––––––––––––––––––––––––––
; Revision History:
;
; 0.00 M. Christ/M. Chishtie. Original code
; 1.00 R./ Piedra, 8/31/98
; – Modifed stage3 macro to correct functional problem
; – Modified order of xmem, ymem operands in butterfly code
; to reduce number of cycles from 10 to 8
;
;*********************************************************************
;
;Variation from macros.asm in fft_approach2.mak. Here the
;auto scaling has been disabled in:
; stage3, stdmacro and laststag
;
;*********************************************************************
.mmregs
;*********************************************************************
; macro : combo5xx
;
; COMBO5xx macro implements a bit reversal stage and the first two FFT
; stages (radix–4 implementation). Bit reversal is now done in the same
; loop
; thereby saving cycles. Circular addressing is used to access INPUT
; buffer and
; bit–reversed addressing is used to implement the DATA buffer.
; Therefore INPUT
; buffer must now be aligned at 4*N and DATA buffer at 2*N boundary.
; (MCHI)
;–––––––––––––––––––––––––––––––––––

combo5xx .macro ; REPEAT MACRO ‘combo5xx’: N/4 times
	; .global STAGE1,COMBO1,COMBO2,end1,end2,end?
	*
	* R1 := [(R1+R2)+(R3+R4)]/4 INPUT OUTPUT
	*
	* R2 := [(R1–R2)+(I3–I4)]/4 –––––––––––––––––– ––––––––––––––––––
	*
	* R3 := [(R1+R2)–(R3+R4)]/4 AR0 = 7
	*
	* R4 := [(R1–R2)–(I3–I4)]/4 AR1 –> R1,I1 AR1 – > R5,I5
	*
	* I1 := [(I1+I2)+(I3+I4)]/4 AR2 –> R2,I2 AR2 – > R6,I6
	*
	* I2 := [(I1–I2)–(R3–R4)]/4 ARP–> AR3 –> R3,I3 ARP – > AR3 – > R7,I7
	*


	* I3 := [(I1+I2)–(I3+I4)]/4 AR4 –> R4,I4 AR4 – > R8,I8
	*
	* I4 := [(I1–I2)+(R3–R4)]/4
	*
	;
STAGE1:
	mvdk *sp(DATA),ar2			; (RMP) pointer to DATA r1,i1
	mvdk *sp(DATA),ar3
	mvmm ar3,ar4
	mvmm ar3,ar5
	mar *+ar3(2) 				; pointer to DATA + 2 r2,i2
	mar *+ar4(4) 				; pointer to DATA + 4 r3,i3
	mar *+ar5(6) 				; pointer to DATA + 6 r4,i4
	ld *sp(scale), a
	bcd COMBO2, AEQ
	ld #0,ASM 				; ASM=0
	nop
	ld #–2, ASM
   .if N>4
	stm #7,ar0 				; index
	stm #0,BK 				; blocksize to zero!
	stm #N/4–1,BRC				; execute N/4–1 times ‘combo5xx’
	rptb end1
   .endif
						; AR2 AR3 AR4 AR5
						; ; ––– ––– ––– –––
COMBO1 	sub *ar2,*ar3,B 			; B := (R1–R2) R1 R2 R3 R4
	add *ar2,*ar3,A 			; A := (R1+R2) R1 R2 R3 R4
	sth B,ASM,*ar3 				; R2’:= (R1–R2)/4 R1 R2 R3 R4
	add *ar4,*ar5,B 			; B := (R3+R4) R1 R2 R3 R4
	add B,A 				; A := (R1+R2) + (R3+R4)
						; R1 R2 R3 R4
	sth A,ASM,*ar2+ 			; R1’:=((R1+R2) + (R3+R4))/4
						; I1 R2 R3 R4
	sub B,1,A 				; B :=((R1+R2) – (R3+R4))
						; I1 R2 R3 R4
	sub *ar4,*ar5,B 			; B := (R3–R4)
						; I1 R2 R3 R4
	st A,*ar4+ ;ASM 			; R3’:=((R1+R2) – (R3+R4))/4
						; I1 R2 I3 R4
	|| ld *ar3,A ; 16 			; A := (R1–R2)/4 I1 R2 I3 R4
	sth B,ASM,*ar5+ 			; R4’:= (R3–R4)/4 I1 R2 I3 I4
	sub *ar4,*ar5–,B 			; B := (I3–I4) I1 R2 I3 R4
	add B,ASM,A 				; A := (R1–R2) + (I3 –I4)/4
						; I1 R2 I3 R4
	sth A,*ar3+ 				; R2’:= (R1–R2) + (I3 –I4)/4
						; I1 I2 I3 R4
	sub B,–1,A 				; A :=((R1–R2) – (I3–I4))
						; I1 I2 I3 R4
	
	
	ld *ar5,16,B 				; B=R3–R4
	sth A,*ar5+ 				; R4’:=((R1–R2) – (I3–I4))/4
						; I1 I2 I3 I4
	add *ar4,*ar5,A 			; A := (I3+I4) I1 I2 I3 I4
	sth A,ASM,*ar4 				; I3’:= (I3+I4)/4 I1 I2 I3 I4
	sub *ar2,*ar3,A 			; A := (I1–I2) I1 I2 I3 I4
	add B,2,A				; A := (I1–I2)+ (r3–r4)
						; I1 I2 I3 I4
	sth A,ASM,*ar5+0 			; I4’:= (I1–I2)+ (r3–r4)/4
						; I1 I2 I3 R4’
	sub B,3,A 				; A := (I1–I2)– (r3–r4)
						; I1 I2 I3 R4’
	add *ar2,*ar3,B 			; B := (I1+I2) I1 I2 I3 R4’
	st A,*ar3+0% 				;asm; I2’:= (I1–I2)–(R3–R4)/4
						; I1 R2’ I3 R4’
	|| ld *ar4,A 				;16 ; A := (I3+I4)/4 I1 R2’ I3 R4’
	add A,2,B 				; B := (I1+I2)+(I3+I4)
						; I1 R2’ I3 R4’
	sth B,ASM,*ar2+0 			; I1’:= (I1+I2)+(I3+I4)/4
						; R1’ R2’ I3 R4’
	sub A,3,B 				; B := (I1+I2)–(I3+I4)/4
						; R1’ R2’ I3 R4’
end1 	sth B,ASM,*ar4+0 			; I3’:= (I1+I2)–(I3+I4)/4
						; R1’ R2’ R3’ R4’
bend?

COMBO2
   .if N>4
	stm #7,ar0 				; index
	stm #0,BK 				; blocksize to zero!
	stm #N/4–1,BRC 				; execute N/4–1 times ’combo5xx’
	rptb end2 ;
   .endif
						; AR2 AR3 AR4 AR5
						; ; ––– ––– ––– –––
	sub *ar2,*ar3,B 			; B := (R1–R2) R1 R2 R3 R4
	add *ar2,*ar3,A 			; A := (R1+R2) R1 R2 R3 R4
	sth B,ASM,*ar3 				; R2’:= (R1–R2) R1 R2 R3 R4
	add *ar4,*ar5,B 			; B := (R3+R4) R1 R2 R3 R4
	add B,A 				; A := (R1+R2) + (R3+R4)
						; R1 R2 R3 R4
	sth A,ASM,*ar2+ 			; R1’:= (R1+R2) + (R3+R4)
						; I1 R2 R3 R4
	sub B,1,A 				; A := (R1+R2) – (R3+R4)
						; I1 R2 R3 R4
	sub *ar4,*ar5,B 			; B := (R3–R4) I1 R2 R3 R4
	st A,*ar4+ ;ASM 			; R3’:= (R1+R2) – (R3+R4)
💿 文件大小 223.37K
👤 上传用户 yueyan51
📂 所属分类 DSP
🏷️ 相关标签

#DSP #算法 #程序 #代码
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -