📄 fpu087.asm
字号:
mov di, dx
mul WORD PTR InvPiFg33
mov bx, dx
mov ax, di
mul WORD PTR InvPiFg33
add bx, ax
adc cx, dx
mov ax, si
mul WORD PTR InvPiFg33 + 2
add bx, ax
adc cx, dx
mov ax, di
mul WORD PTR InvPiFg33 + 2
add ax, cx
adc dx, 0
and dx, 3 ;Remove multiples of 2 pi
shl dx, 1 ;move over to make space for octant number
;
;CJLT - new code to reduce angle to: 0 <= angle <= 45
;
or ax, ax
jns Lessthan45
inc dx ;octant number
not ax ;angle=90-angle if it was >45 degrees
Lessthan45:
add a, dx ;remember octant and Sign in a
mov Num, ax ;ax=Term, now
;
; We do the Taylor series with all numbers scaled by pi/2
; so InvPiFg33 represents one. Truly.
;
mov Factorial, WORD PTR InvPiFg33 + 2
mov one, Factorial
mov Cos, Factorial ; Cos = 1
mov Sin, Num ; Sin = Num
LoopIntSinCos:
TaylorTerm ; ax=Term = Num * (Num/2) * (Num/3) * . . .
sub Cos, Term ; Cos = 1 - Num*(Num/2) + (Num**4)/4! - . . .
cmp Term, WORD PTR TrigLimit
jbe SHORT ExitIntSinCos
TaylorTerm
sub Sin, Term ; Sin = Num - Num*(Num/2)*(Num/3) + (Num**5)/5! - . . .
cmp Term, WORD PTR TrigLimit
jbe SHORT ExitIntSinCos
TaylorTerm
add Cos, Term
cmp Term, WORD PTR TrigLimit
jbe SHORT ExitIntSinCos
TaylorTerm ; Term = Num * (x/2) * (x/3) * . . .
add Sin, Term
cmp Term, WORD PTR TrigLimit
jnbe LoopIntSinCos
ExitIntSinCos:
xor ax, ax
mov cx, ax
cmp Cos, WORD PTR InvPiFg33 + 2
jb CosDivide ; Cos < 1.0
inc cx ; Cos == 1.0
jmp StoreCos
CosDivide:
mov dx, Cos
div WORD PTR InvPiFg33 + 2
StoreCos:
mov Cos, ax ; cx:Cos
xor ax, ax
mov bx, ax
cmp Sin, WORD PTR InvPiFg33 + 2
jb SinDivide ; Sin < 1.0
inc bx ; Sin == 1.0
jmp StoreSin
SinDivide:
mov dx, Sin
div WORD PTR InvPiFg33 + 2
StoreSin:
mov Sin, ax ; bx:Sin
; CJLT again. New tests are needed to correct signs and exchange sin/cos values
mov ax, a
inc al ;forces bit 1 to xor of previous bits 0 and 1
test al, 2
jz ChkNegCos
xchg cx, bx
xchg Sin, Cos
; mov ax, SinNeg commented out by CJLT. This was a bug.
; xchg ax, CosNeg
; mov CosNeg, ax and this was meant to be mov SinNeg,ax
ChkNegCos:
inc al ;forces bit 2 to xor of original bits 1 and 2
test al, 4
jz ChkNegSin
not Cos ;negate cos if quadrant 2 or 3
not cx
add Cos, 1
adc cx, 0
ChkNegSin:
inc al
inc al ;forces bit 3 to xor of original bits 2 and 3
test al, 8
jz CorrectQuad
not Sin
not bx
add Sin, 1
adc bx, 0
CorrectQuad:
CosPolarized:
mov dx, bx
mov bx, CosAddr
mov WORD PTR [bx], Cos
mov WORD PTR [bx+2], cx
SinPolarized:
mov bx, SinAddr
mov WORD PTR [bx], Sin
mov WORD PTR [bx+2], dx
ret
SinCos086 ENDP
PUBLIC ArcTan086
;
;Used to calculate logs of complex numbers, since log(R,theta)=log(R)+i.theta
; in polar coordinates.
;
;In C we need the prototype:
;long ArcTan086(long, long)
;The parameters are x and y, the returned value arctan(y/x) in the range 0..2pi.
ArcTan086 PROC uses si di, x1:word, x2:word, y1:word, y2:word
xor ax, ax ;ax=0
mov si, x1 ;Lo
mov bx, x2 ;Hi
or bx, bx ;Sign set ?
jns xplus
inc ax
not si
not bx
add bx, 1
adc si, 0 ;We have abs(x) now
xplus:
mov di, y1 ;Lo
mov cx, y2 ;Hi
or cx, cx ;Sign set?
jns yplus
inc ax
inc ax ;Set bit 1 of ax
shl ax, 1 ; and shift it all left
not di
not cx
add di, 1
adc cx, 0 ;We have abs(y) now
yplus:
cmp bx, cx ;y>x?
jl no_xchg
jg xchg_xy
cmp si, di ;Hi words zero. What about lo words?
jle no_xchg
xchg_xy: ;Exchange them
inc ax ;Flag the exchange
xchg si, di
xchg bx, cx
no_xchg:
mov SinNeg, ax ;Remember signs of x and y, and whether exchanged
or cx, cx ;y<x now, but is y>=1.0 ?
jz ynorm ;no, so continue
normy: ;yes, normalise by reducing y to 16 bits max.
rcr cx, 1 ; (We don't really lose any precision)
rcr di, 1
clc
rcr bx, 1
rcr si, 1
or cx, cx
jnz normy
ynorm:
ret
ArcTan086 ENDP
;
;There now follows Chris Lusby Taylor's novel (such modesty!) method
;of calculating exp(x). Originally, I was going to do it by decomposing x
;into a sum of terms of the form:
;
; th -i
; i term=ln(1+2 )
; -i
;If x>term[i] we subtract term[i] from x and multiply the answer by 1 + 2
;
;We can multiply by that factor by shifting and adding. Neat eh!
;
;Unfortunately, however, for 16 bit accuracy, that method turns out to be
;slower than what follows, which is also novel. Here, I first divide x not
;by ln(2) but ln(2)/16 so that we can look up an initial answer in a table of
;2^(n/16). This leaves the remainder so small that the polynomial Taylor series
;converges in just 1+x+x^2/2 which is calculated as 1+x(1+x/2).
;
_e2xLT PROC ;argument in dx.ax (bitshift=16 is hard coded here)
or dx, dx
jns CalcExpLT
not ax ; if negative, calc exp(abs(x))
not dx
add ax, 1
adc dx, 0
CalcExpLT:
shl ax, 1
rcl dx, 1
shl ax, 1
rcl dx, 1
shl ax, 1
rcl dx, 1 ;x is now in fg19 form for accuracy
; so, relative to fg16, we have:
div Ln2Fg15 ; 8x==(a * Ln(2)/2) + Rem
; x=(a * Ln(2)/16) + Rem/8
;so exp(x)=exp((a * Ln(2)/16) + Rem/8)
; =exp(a/16 * Ln(2)) * exp(Rem/8)
; =2^(a/16) * exp(Rem)
;a mod 16 will give us an initial estimate
mov cl, 4
mov di, ax ;Remember original
shr ax, cl
mov a, ax ;a/16 will give us a bit shift
mov ax, di
and ax, 0000fh ;a mod 16
shl ax, 1 ;used as an index into 2 byte per element Table
mov di, ax
dec cx ;3, please
add dx, 4 ;to control rounding up/down
shr dx, cl ;Num=Rem/8 (convert back to fg16)
;
mov ax, dx
shr ax, 1 ;Num/2 fg 16
inc ax ;rounding control
stc
rcr ax, 1 ;1+Num/2 fg15
mul dx ;dx:ax=Num(1+Num/2) fg31, so dx alone is fg15
shl ax, 1 ;more rounding control
adc dx, 8000H ;dx=1+Num(1+Num/2) fg15
mov ax, Table[di] ;Get table entry fg15
mul dx ;Only two multiplys used! fg14, now (15+15-16)
shl ax, 1
rcl dx, 1 ;fg15
mov e, dx
ret ; return e=e**x * (2**15), 1 < e**x < 2
_e2xLT ENDP ; a= bitshift needed
Exp086 PROC uses si di, LoNum:WORD, HiNum:WORD
mov ax, LoNum
mov dx, HiNum
mov TrigOverflow, 0
call _e2xLT ;Use Chris Lusby Taylor's e2x
cmp a, 15 ;CJLT - was 16
jae Overflow
; cmp expSign, 0
; jnz NegNumber
cmp HiNum, 0 ;CJLT - we don't really need expSign
jl NegNumber
mov ax, e
mov dx, ax
inc a
mov cx, 16
sub cx, a
shr dx, cl
mov cx, a
shl ax, cl
jmp ExitExp086
Overflow:
xor ax, ax
xor dx, dx
mov TrigOverflow, 1
jmp ExitExp086
NegNumber:
cmp e, 8000h
jne DivideE
mov ax, e
dec a
jmp ShiftE
DivideE:
xor ax, ax
mov dx, ax
stc
rcr dx, 1
div e
ShiftE:
xor dx, dx
mov cx, a
shr ax, cl
ExitExp086:
ret
Exp086 ENDP
SinhCosh086 PROC uses si di, LoNum:WORD, HiNum:WORD, SinhAddr:WORD, \
CoshAddr:WORD
mov ax, LoNum
mov dx, HiNum
call _e2xLT ;calculate exp(|x|) fg 15
;answer is e*2^a
cmp e, 8000h ;e==1 ?
jne InvertE ; e > 1, so we can invert it.
mov dx, 1
xor ax, ax
cmp a, 0
jne Shiftone
mov e, ax
mov cx, ax
jmp ChkSinhSign
Shiftone:
mov cx, a
shl dx, cl
dec cx
shr e, cl
shr dx, 1
shr e, 1
mov cx, dx
sub ax, e
sbb dx, 0
xchg ax, e
xchg dx, cx
jmp ChkSinhSign
InvertE:
xor ax, ax ; calc 1/e
mov dx, 8000h
div e
mov Inve, ax
ShiftE:
mov cx, a
shr Inve, cl
inc cl
mov dx, e
shl e, cl
neg cl
add cl, 16
shr dx, cl
mov cx, dx ; cx:e == e**Exp
mov ax, e ; dx:e == e**Exp
add ax, Inve
adc dx, 0
shr dx, 1
rcr ax, 1 ; cosh(Num) = (e**Exp + 1/e**Exp) / 2
sub e, Inve
sbb cx, 0
sar cx, 1
rcr e, 1
ChkSinhSign:
or HiNum, 0
jns StoreHyperbolics
not e
not cx
add e, 1
adc cx, 0
StoreHyperbolics:
mov bx, CoshAddr
mov WORD PTR [bx], ax
mov WORD PTR [bx+2], dx
mov bx, SinhAddr
mov WORD PTR [bx], e
mov WORD PTR [bx+2], cx
ret
SinhCosh086 ENDP
Log086 PROC uses si di, LoNum:WORD, HiNum:WORD, Fudge:WORD
LOCAL Exp:WORD, Accum:WORD, LoAns:WORD, HiAns:WORD
;NOTE: CJLT's method does not need LoAns, HiAns, but he hasn't yet
;taken them out
xor bx, bx
mov cx, Fudge
mov ax, LoNum
mov dx, HiNum
mov TrigOverflow, 0
or dx, dx
js Overflow
jnz ShiftUp
or ax, ax
jnz ShiftUp
Overflow:
mov TrigOverflow, 1
jmp ExitLog086
ShiftUp:
inc cx ; cx = Exp
shl ax, 1
rcl dx, 1
or dx, dx
jns ShiftUp ;shift until dx in fg15 form
neg cx
add cx, 31
mov Exp, cx
;CJLT method starts here. We try to reduce x to make it very close to 1
;store LoAns in bx for now (fg 16; initially 0)
mov cl,2 ;shift count
redoterm2:
cmp dx, 0AAABH ;x > 4/3 ?
jb doterm3
mov ax, dx
shr ax, cl
sub dx, ax ;x:=x(1-1/4)
add bx, 49a5H ;ln(4/3) fg 15
jmp redoterm2
doterm3:
inc cl ;count=3
redoterm3:
cmp dx, 9249H ;x > 8/7 ?
jb doterm4
mov ax, dx
shr ax, cl
sub dx, ax ;x:=x(1-1/8)
add bx, 222eH ;ln(8/7)
jmp redoterm3
doterm4:
inc cl ;count=4
cmp dx, 8889H ;x > 16/15 ?
jb skipterm4
mov ax, dx
shr ax, cl
sub dx, ax ;x:=x(1-1/16)
add bx, 1085h ;ln(16/15)
;No need to retry term4 as error is acceptably low if we ignore it
skipterm4:
;Now we have reduced x to the range 1 <=x <1.072
;
;Now we continue with the conventional series, but x is so small we
;can ignore all terms except the first!
;i.e.:
;ln(x)=2(x-1)/(x+1)
sub dx, 8000h ; dx= x-1, fg 15
mov cx, dx
stc
rcr cx, 1 ; cx = 1 + (x-1)/2 fg 15
; = 1+x fg 14
mov ax,4000H ;rounding control (Trust me)
div cx ;ax=ln(x)
add bx, ax ;so add it to the rest of the Ans. No carry
MultExp:
mov cx, Exp
mov ax, Ln2Fg16
or cx, cx
js SubFromAns
mul cx ; cx = Exp * Lg2Fg16, fg 16
add ax, bx ;add bx part of answer
adc dx, 0
jmp ExitLog086
SubFromAns:
inc bx ;Somewhat artificial, but we need to add 1 somewhere
neg cx
mul cx
not ax
not dx
add ax, bx
adc dx, 0
ExitLog086:
ret
Log086 ENDP
END
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -