📄 recip16.asm
字号:
;***********************************************************
; Version 2.20.01
;***********************************************************
;****************************************************************
; Filename: recip16.asm
; Description: calculates reciprocal of Q15 number
;----------------------------------------------------------------
; Synopsis:
;
; void recip16 (DATA *x, DATA *z, DATA *zexp, ushort n)
;
; x[n] : pointer to input/sampled data buffer
;
; z[n] : pointer to output data buffer
;
; zexp[n] : pointer to exponent buffer for output values
;
; n : number of vector elements
;
;----------------------------------------------------------------
; Description:
;
; This routine returns the fractional and exponential portion
; of the reciprocal of a Q15 number. Since the reciprocal is always
; greater than 1, it returns an exponent such that:
;
; z[i] * zexp[i] = reciprocal
;
;----------------------------------------------------------------
; Algorithm:
;
;
;
; The most optimal method for calculating the inverse of a
; fractional number (Y=1/X) is to normalize the number first.
; This limits the range of the number as follows:
;
; 0.5 <= Xnorm < 1
; -1 <= Xnorm <= -0.5 (1)
;
; The resulting equation becomes:
; Y = 1/(Xnorm*2^-n)
;
; or Y = 2^n/Xnorm (2)
;
; where n = 1,2,3,...,14,15
;
; Letting Ye = 2^n:
;
; Ye = 2^n (3)
;
; Substituting (3) into equation (2):
;
; Y = Ye * 1/Xnorm (4)
;
; Letting Ym = 1/Xnorm:
;
; Ym = 1/Xnorm (5)
;
; Substituting (5) into equation (4):
;
; Y = Ye * Ym (6)
;
; For the given range of Xnorm, the range of Ym is:
;
; 1 <= Ym < 2
; -2 <= Ym <= -1 (7)
;
; To calculate the value of Ym, various options are possible:
;
; (a) Taylor Series Expansion
; (b) 2nd,3rd,4th,.. Order Polynomial (Line Of Best Fit)
; (c) Successive Approximation
;
; The method chosen in this example is (c). Successive
; approximation yields the most optimum code versus speed
; versus accuracy option. The method outlined below yilds an
; accuracy of 15 bits.
;
; Assume Ym(new) = exact value of 1/Xnorm:
;
; Ym(new) = 1/Xnorm (c1)
;
; or Ym(new)*X = 1 (c2)
;
; Assume Ym(old) = estimate of value 1/X:
;
; Ym(old)*Xnorm = 1 + Dyx
;
; or Dxy = Ym(old)*Xnorm - 1 (c3)
;
; where Dyx = error in calculation
;
; Assume that Ym(new) and Ym(old) are related as follows:
;
; Ym(new) = Ym(old) - Dy (c4)
;
; where Dy = difference in values
;
; Substituting (c2) and (c4) into (c3):
;
; Ym(old)*Xnorm = Ym(new)*Xnorm + Dxy
;
; (Ym(new) + Dy)*Xnorm = Ym(new)*Xnorm + Dxy
;
; Ym(new)*Xnorm + Dy*Xnorm = Ym(new)*Xnorm + Dxy
;
; Dy*Xnorm = Dxy
;
; Dy = Dxy * 1/Xnorm (c5)
;
; Assume that 1/Xnorm is approximately equal to Ym(old):
;
; Dy = Dxy * Ym(old) (approx) (c6)
;
; Substituting (c6) into (c4):
;
; Ym(new) = Ym(old) - Dxy*Ym(old) (c7)
;
; Substituting for Dxy from (c3) into (c7):
;
; Ym(new) = Ym(old) - (Ym(old)*Xnorm - 1)*Ym(old)
;
; Ym(new) = Ym(old) - Ym(old)^2*Xnorm + Ym(old)
;
; Ym(new) = 2*Ym(old) - Ym(old)^2*Xnorm (c8)
;
; If after each calculation we equate Ym(old) to Ym(new):
;
; Ym(old) = Ym(new) = Ym
;
; Then equation (c8) evaluates to:
;
; +--------------------------+
; | Ym = 2*Ym - Ym^2*Xnorm | (c9)
; +--------------------------+
;
; If we start with an initial estimate of Ym, then equation
; (c9) will converge to a solution very rapidly (typically
; 3 iterations for 16-bit resolution).
;
; The initial estimate can either be obtained from a look up
; table, or from choosing a mid-point, or simply from linear
; interpolation. The method chosen for this problem is the
; latter. This is simply accomplished by taking the complement
; of the least significant bits of the Xnorm value.
;
;----------------------------------------------------------------
; Benchmarks
;
; cycles : 4 + nx * 54 (core)
; 24 (c-overhead)
; core + c-overhead (total)
; code size : 77 words program space
; 15 words data space
;
;
;----------------------------------------------------------------
; Revision History:
; 0.00, Alex Tessarolo, Original
; 1.00, Jeff Axelrod, vectorized and ported to TMS320C54x
;****************************************************************
.mmregs
.ref InvYeTable
.def _recip16
_recip16:
;----------------------------------------------------------------
; Make adjustment to stack offset for far_mode
;----------------------------------------------------------------
.if __far_mode
.asg 2, OFFSET
.else
.asg 1, OFFSET
.endif
;----------------------------------------------------------------
; Set register size of reister save area on stack
;----------------------------------------------------------------
.asg 3, REG_SAVE_SZ
;----------------------------------------------------------------
; Set offsets to local function variables defined on stack
;----------------------------------------------------------------
.asg 0, SP_INVYETABLE
.asg 1, SP_XNORM
.asg 2, SP_TEMP
.asg 3, FRAME_SZ
;----------------------------------------------------------------
; Set offsets to function arguments passed on stack
;----------------------------------------------------------------
.asg OFFSET + REG_SAVE_SZ + FRAME_SZ, ARG_OFFSET
.asg 0 + ARG_OFFSET, ARG_Z
.asg 1 + ARG_OFFSET, ARG_ZEXP
.asg 2 + ARG_OFFSET, ARG_N
;----------------------------------------------------------------
; Assign registers to local variables
;----------------------------------------------------------------
.asg ar0, AR_X
.asg ar1, AR_Z
.asg brc, AR_N
.asg ar3, AR_ZEXP
.asg ar4, AR_TABLE
;-----------------------------------------------------------------
; Save registers and establish local frame
;-----------------------------------------------------------------
pshm ar1 ; 1 cycle
PSHM ST0 ; 1 cycle
PSHM ST1 ; 1 cycle
RSBX OVA ; 1 cycle
RSBX OVB ; 1 cycle
frame #-FRAME_SZ ; 1 cycle
;-----------------------------------------------------------------
; Process command-line arguments
;-----------------------------------------------------------------
stl a,*(AR_X) ; 1 cycle
mvdk *sp(ARG_Z),AR_Z ; 2 cycles
mvdk *sp(ARG_ZEXP),AR_ZEXP ; 2 cycles
ld *sp(ARG_N),a ; 1 cycle
sub #1,a ; 2 cycles
stlm a,AR_N ; 1 cycle
;-----------------------------------------------------------------
; Initialize constants
;-----------------------------------------------------------------
st #InvYeTable,*sp(SP_INVYETABLE)
ssbx SXM ; 1 cycle, MUST set sign-extension mode.
ssbx OVM ; 1 cycle, MUST turn overflow mode on.
LOOP:
rptbd DONE-1 ; 2 cycles
ld *AR_X+,16,a ; 1 cycle, delay - slot Normalize the input value X.
exp a ; 1 cycle, delay - slot
loop_start:
nop ; 1 cycle
nop ; 1 cycle
norm a ; 1 cycle
st t,*sp(SP_TEMP) ; 1 cycle
ld #InvYeTable,b ; 2 cycles
add *sp(SP_TEMP),b ; 1 cycle
stl b,*(AR_TABLE) ; 1 cycle
sth a,*sp(SP_XNORM) ; 1 cycle, AR2 points to appropriate Ye value in table.
sfta a,-1 ; 1 cycle, Estimate the first Ym value.
xor #01FFFh,16,a ; 2 cycles
sth a,*AR_Z ; 1 cycle
;-----------------------------------------------------------------
; First two iterations:
;-----------------------------------------------------------------
.loop 2
ld *AR_Z,15,a ; 2 cycles, Calculate Ym = 2*Ym - Ym^2*X
ld *AR_Z,t ; 1 cycle
mpy *sp(SP_XNORM),b ; 1 cycle
sth b,1,*AR_Z ; 2 cycles
mpy *AR_Z,b ; 1 cycle
sub b,1,a ; 1 cycle
sth a,2,*AR_Z ; 2 cycles
.endloop
;-----------------------------------------------------------------
; Final iteration: - this code is same as above loop, except
; last instruction omitted
;-----------------------------------------------------------------
ld *AR_Z,15,a ; 2 cycles, Calculate Ym = 2*Ym - Ym^2*X
ld *AR_Z,t ; 1 cycle
mpy *sp(SP_XNORM),b ; 1 cycle
sth b,1,*AR_Z ; 2 cycles
mpy *AR_Z,b ; 1 cycle
sub b,1,a ; 1 cycle
st #07000h,*AR_Z ; 2 cycles, Make sure that 8000h <= Ym < 7FFFh
add *AR_Z,16,a ; 1 cycle
sub *AR_Z,16,a ; 1 cycle
sub *AR_Z,16,a ; 1 cycle
add *AR_Z,16,a ; 1 cycle
sth a,3,*AR_Z+ ; 2 cycles
ld *AR_TABLE,a ; 1 cycle, Read exponent value from table.
stl a,*AR_ZEXP+ ; 1 cycle
ld *AR_X+,16,a ; 1 cycle, get next input value X.
exp a ; 1 cycle
DONE:
frame #FRAME_SZ ; 1 cycle
POPM ST1
POPM ST0
popm ar1 ; 1 cycle
.if __far_mode
fretd ; 4 cycles
.else
retd ; 3 cycles
.endif
nop
nop
.data
InvYeTable:
.word 0002h ; Ye = 2^1
.word 0004h ; Ye = 2^2
.word 0008h ; Ye = 2^3
.word 0010h ; Ye = 2^4
.word 0020h ; Ye = 2^5
.word 0040h ; Ye = 2^6
.word 0080h ; Ye = 2^7
.word 0100h ; Ye = 2^8
.word 0200h ; Ye = 2^9
.word 0400h ; Ye = 2^10
.word 0800h ; Ye = 2^11
.word 1000h ; Ye = 2^12
.word 2000h ; Ye = 2^13
.word 4000h ; Ye = 2^14
.word 8000h ; Ye = 2^15
;end of file. please do not remove. it is left here to ensure that no lines of code are removed by any editor
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -