recip16.asm

来自「CCS3.3自带的TI 5400系列DSP的dsplib文件。文档说明可以在TI」· 汇编代码 · 共 346 行
ASM
346 行
;***********************************************************
; Version 2.20.01                                           
;***********************************************************
;****************************************************************
;  Filename:	recip16.asm
;  Description: calculates reciprocal of Q15 number
;----------------------------------------------------------------
;  Synopsis:
;
;  void recip16 (DATA *x, DATA *z, DATA *zexp, ushort n)
;                                                              
;  x[n]		: pointer to input/sampled data buffer
;
;  z[n]		: pointer to output data buffer
;
;  zexp[n]	: pointer to exponent buffer for output values
;
;  n   		: number of vector elements                
;
;----------------------------------------------------------------
;  Description:
;
;     This routine returns the fractional and exponential portion
;  of the reciprocal of a Q15 number. Since the reciprocal is always
;  greater than 1, it returns an exponent such that:
;          
;               z[i] * zexp[i] = reciprocal
;
;----------------------------------------------------------------
;  Algorithm:
;
;
;
; The most optimal method for calculating the inverse of a
; fractional number (Y=1/X) is to normalize the number first. 
; This limits the range of the number as follows:	
;
; 0.5 <= Xnorm <   1		
; -1  <= Xnorm <= -0.5			(1)
;
;		The resulting equation becomes:
;		Y = 1/(Xnorm*2^-n)
;
;	or	Y = 2^n/Xnorm				(2)
;
;		where n = 1,2,3,...,14,15 
;
;	Letting Ye = 2^n:
;
;		Ye = 2^n				(3)
;
;	Substituting (3) into equation (2):
;
;		Y = Ye * 1/Xnorm			(4)
;
;	Letting Ym = 1/Xnorm:
;
;		Ym = 1/Xnorm				(5)
;
;	Substituting (5) into equation (4):
;
;		Y = Ye * Ym				(6)
; 	
;	For the given range of Xnorm, the range of Ym is:
;
;		 1 <= Ym <   2
;		-2 <= Ym <= -1				(7)
;
;	To calculate the value of Ym, various options are possible:
;
;	(a) Taylor Series Expansion
;	(b) 2nd,3rd,4th,.. Order Polynomial (Line Of Best Fit)
;	(c) Successive Approximation
;
;	The method chosen in this example is (c). Successive 
;	approximation yields the most optimum code versus speed 
;	versus accuracy option. The method outlined below yilds an 
;	accuracy of 15 bits.
;
;	Assume Ym(new) = exact value of 1/Xnorm:
;		
;		Ym(new) = 1/Xnorm			(c1)
;		
;	or	Ym(new)*X = 1				(c2)
;
;	Assume Ym(old) = estimate of value 1/X:
;
;		Ym(old)*Xnorm = 1 + Dyx 		
;
;	or	Dxy = Ym(old)*Xnorm - 1			(c3)
;
;		where Dyx = error in calculation
;
;	Assume that Ym(new) and Ym(old) are related as follows:
;
;		Ym(new) = Ym(old) - Dy			(c4)
;
;		where Dy = difference in values 
;
;	Substituting (c2) and (c4) into (c3):
;		
;		Ym(old)*Xnorm = Ym(new)*Xnorm + Dxy
;
;		(Ym(new) + Dy)*Xnorm = Ym(new)*Xnorm + Dxy
;
;		Ym(new)*Xnorm + Dy*Xnorm = Ym(new)*Xnorm + Dxy
;		
;		Dy*Xnorm = Dxy			
;
;		Dy = Dxy * 1/Xnorm			(c5)
;
;	Assume that 1/Xnorm is approximately equal to Ym(old):
;
;		Dy = Dxy * Ym(old) (approx)		(c6)
;
;		Substituting (c6) into (c4):
;
;		Ym(new) = Ym(old) - Dxy*Ym(old)		(c7)
;
;	Substituting for Dxy from (c3) into (c7):
;
;		Ym(new) = Ym(old) - (Ym(old)*Xnorm - 1)*Ym(old)
;
;		Ym(new) = Ym(old) - Ym(old)^2*Xnorm + Ym(old)
;
;	     	Ym(new) = 2*Ym(old) - Ym(old)^2*Xnorm 	(c8) 
;
;	If after each calculation we equate Ym(old) to Ym(new):
;
;		Ym(old) = Ym(new) = Ym
;		
;	Then equation (c8) evaluates to:
;
;	     +--------------------------+
;	     |	Ym = 2*Ym - Ym^2*Xnorm 	| 		(c9) 
;	     +--------------------------+
;
;	If we start with an initial estimate of Ym, then equation
;	(c9) will converge to a solution very rapidly (typically
;	3 iterations for 16-bit resolution).
;
;	The initial estimate can either be obtained from a look up 
;	table, or from choosing a mid-point, or simply from linear 
;	interpolation. The method chosen for this problem is the
;	latter. This is simply accomplished by taking the complement
;	of the least significant bits of the Xnorm value.
;
;----------------------------------------------------------------
; Benchmarks
;
;  cycles	:	4 + nx * 54	 	(core)
;			24			(c-overhead)
;			core + c-overhead	(total)
;  code size	:       77 words program space
;                       15 words data space
;
;
;----------------------------------------------------------------
;  Revision History:
;  0.00, Alex Tessarolo, Original 
;  1.00, Jeff Axelrod, vectorized and ported to TMS320C54x
;****************************************************************

	.mmregs

	.ref InvYeTable

	.def	_recip16
_recip16:

;----------------------------------------------------------------
; Make adjustment to stack offset for far_mode
;----------------------------------------------------------------

	.if __far_mode
	.asg 2, OFFSET
	.else
	.asg 1, OFFSET
	.endif

;----------------------------------------------------------------
; Set register size of reister save area on stack
;----------------------------------------------------------------

	.asg	3, REG_SAVE_SZ

;----------------------------------------------------------------
; Set offsets to local function variables defined on stack
;----------------------------------------------------------------

	.asg	0, SP_INVYETABLE
	.asg	1, SP_XNORM
	.asg	2, SP_TEMP

	.asg	3, FRAME_SZ

;----------------------------------------------------------------
; Set offsets to function arguments passed on stack
;----------------------------------------------------------------

	.asg	OFFSET + REG_SAVE_SZ + FRAME_SZ, ARG_OFFSET

	.asg	0 + ARG_OFFSET, ARG_Z
	.asg	1 + ARG_OFFSET, ARG_ZEXP
	.asg	2 + ARG_OFFSET, ARG_N

;----------------------------------------------------------------
; Assign registers to local variables
;----------------------------------------------------------------

	.asg	ar0, AR_X
	.asg	ar1, AR_Z
	.asg	brc, AR_N
	.asg	ar3, AR_ZEXP
	.asg	ar4, AR_TABLE

;-----------------------------------------------------------------
; Save registers and establish local frame
;-----------------------------------------------------------------

	pshm	ar1				; 1 cycle
        PSHM    ST0                                 ; 1 cycle
        PSHM    ST1                                 ; 1 cycle
        RSBX    OVA                                 ; 1 cycle
        RSBX    OVB                                 ; 1 cycle

	frame	#-FRAME_SZ			; 1 cycle

;-----------------------------------------------------------------
; Process command-line arguments
;-----------------------------------------------------------------

	stl 	a,*(AR_X)			; 1 cycle
	mvdk 	*sp(ARG_Z),AR_Z			; 2 cycles
	mvdk 	*sp(ARG_ZEXP),AR_ZEXP		; 2 cycles



	ld 	*sp(ARG_N),a			; 1 cycle
	sub	 #1,a				; 2 cycles
	stlm 	a,AR_N				; 1 cycle

;----------------------------------------------------------------- 
; Initialize constants
;-----------------------------------------------------------------

	st #InvYeTable,*sp(SP_INVYETABLE)

	ssbx 	SXM			; 1 cycle, MUST set sign-extension mode.
	ssbx 	OVM			; 1 cycle, MUST turn overflow mode on.
		

LOOP:
	rptbd	DONE-1			; 2 cycles
	ld 	*AR_X+,16,a 		; 1 cycle,  delay - slot Normalize the input value X.
	exp	a			; 1 cycle,  delay - slot

loop_start:

	nop				; 1 cycle
	nop				; 1 cycle
	norm	a			; 1 cycle
	
	st 	t,*sp(SP_TEMP)		; 1 cycle
	ld 	#InvYeTable,b		; 2 cycles
	add 	*sp(SP_TEMP),b		; 1 cycle
	stl 	b,*(AR_TABLE)		; 1 cycle
	sth 	a,*sp(SP_XNORM) 	; 1 cycle, AR2 points to appropriate Ye value in table.
	sfta	a,-1			; 1 cycle, Estimate the first Ym value.
	xor	#01FFFh,16,a		; 2 cycles		
	sth	a,*AR_Z			; 1 cycle

;-----------------------------------------------------------------
; First two iterations:
;-----------------------------------------------------------------

	.loop 2
	ld 	*AR_Z,15,a 		; 2 cycles, Calculate Ym = 2*Ym - Ym^2*X
	ld 	*AR_Z,t			; 1 cycle
	mpy	*sp(SP_XNORM),b		; 1 cycle
	sth	b,1,*AR_Z		; 2 cycles
	mpy	*AR_Z,b			; 1 cycle
	sub	b,1,a			; 1 cycle
	sth	a,2,*AR_Z		; 2 cycles
	.endloop
;-----------------------------------------------------------------
; Final iteration: - this code is same as above loop, except 
; last instruction omitted
;-----------------------------------------------------------------

	ld 	*AR_Z,15,a 		; 2 cycles, Calculate Ym = 2*Ym - Ym^2*X
	ld 	*AR_Z,t			; 1 cycle
	mpy 	*sp(SP_XNORM),b		; 1 cycle
	sth 	b,1,*AR_Z		; 2 cycles
	mpy 	*AR_Z,b			; 1 cycle
	sub 	b,1,a			; 1 cycle

	st	#07000h,*AR_Z		; 2 cycles, Make sure that 8000h <= Ym < 7FFFh
	add	*AR_Z,16,a		; 1 cycle
	sub	*AR_Z,16,a		; 1 cycle
	sub	*AR_Z,16,a		; 1 cycle
	add	*AR_Z,16,a		; 1 cycle
	sth	a,3,*AR_Z+		; 2 cycles

	ld 	*AR_TABLE,a  		; 1 cycle, Read exponent value from table.
	stl 	a,*AR_ZEXP+		; 1 cycle
	
	ld 	*AR_X+,16,a 		; 1 cycle,  get next input value X.
	exp	a			; 1 cycle

DONE:
	frame 	#FRAME_SZ		; 1 cycle
        POPM    ST1
        POPM    ST0
	popm	ar1			; 1 cycle

	.if __far_mode
	fretd				; 4 cycles
	.else
	retd				; 3 cycles
	.endif
  	nop
	nop

	.data
InvYeTable:
	.word	0002h		; Ye = 2^1
	.word	0004h		; Ye = 2^2
	.word	0008h		; Ye = 2^3
	.word	0010h		; Ye = 2^4
	.word	0020h		; Ye = 2^5
	.word	0040h		; Ye = 2^6
	.word	0080h		; Ye = 2^7
	.word	0100h		; Ye = 2^8
	.word	0200h		; Ye = 2^9
	.word	0400h		; Ye = 2^10
	.word	0800h		; Ye = 2^11
	.word	1000h		; Ye = 2^12
	.word	2000h		; Ye = 2^13
	.word	4000h		; Ye = 2^14
	.word	8000h		; Ye = 2^15

;end of file. please do not remove. it is left here to ensure that no lines of code are removed by any editor
recip16.asm - 源码说明

本页面展示了「CCS3.3自带的TI 5400系列DSP的dsplib文件。文档说明可以在TI公司网站上下载。」中的 recip16.asm 源码文件，采用汇编编程语言编写，共 346 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与dsplib相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?