📄 fpu087.asm
字号:
TITLE fpu087.asm (C) 1989, Mark C. Peterson, CompuServe [70441,3353]
SUBTTL All rights reserved.
;
; Code may be used in any program provided the author is credited
; either during program execution or in the documentation. Source
; code may be distributed only in combination with public domain or
; shareware source code. Source code may be modified provided the
; copyright notice and this message is left unchanged and all
; modifications are clearly documented.
;
; I would appreciate a copy of any work which incorporates this code.
;
; Mark C. Peterson
; 405-C Queen St., Suite #181
; Southington, CT 06489
; (203) 276-9721
;
; References:
; The VNR Concise Encyclopedia of Mathematics
; by W. Gellert, H. Hustner, M. Hellwich, and H. Kastner
; Published by Van Nostrand Reinhold Comp, 1975
;
; 80386/80286 Assembly Language Programming
; by William H. Murray, III and Chris H. Pappas
; Published by Osborne McGraw-Hill, 1986
;
; History since Fractint 16.3:
; CJLT changed e2x and Log086 algorithms for more speed
; CJLT corrected SinCos086 for -ve angles in 2nd and 4th quadrants
; CJLT speeded up SinCos086 for angles >45 degrees in any quadrant
; (See comments containing the string `CJLT')
; 14 Aug 91 CJLT removed r16Mul - not called from anywhere
; 21 Aug 91 CJLT corrected Table[1] from 6 to b
; improved bx factors in Log086 for more accuracy
; corrected Exp086 overflow detection to 15 from 16 bits.
; 07 Sep 92 MP corrected problem in FPUcplxlog
; 07 Sep 92 MP added argument test for FPUcplxdiv
; 06 Nov 92 CAE made some varibles public for PARSERA.ASM
; 07 Dec 92 CAE sped up FPUsinhcosh function
;
; CJLT= Chris J Lusby Taylor
; 32 Turnpike Road
; Newbury, England (where's that?)
; Contactable via Compuserve user Stan Chelchowski [100016,351]
; or Tel 011 44 635 33270 (home)
;
; CAE= Chuck Ebbert CompuServe [76306,1226]
;
;
;PROCs in this module:
;FPUcplxmul PROC x:word, y:word, z:word
;FPUcplxdiv PROC x:word, y:word, z:word
;FPUcplxlog PROC x:word, z:word
;FPUsinhcosh PROC x:word, sinh:word, cosh:word
;FPUsincos PROC x:word, sinx:word, cosx:word
;r16Mul PROC uses si di, x1:word, x2:word, y1:word, y2:word
;RegFloat2Fg PROC x1:word, x2:word, Fudge:word
;RegFg2Float PROC x1:word, x2:word, FudgeFact:byte
;ExpFudged PROC uses si, x_low:word, x_high:word, Fudge:word
;LogFudged PROC uses si di, x_low:word, x_high:word, Fudge:word
;LogFloat14 PROC x1:word, x2:word
;RegSftFloat PROC x1:word, x2:word, Shift:byte
;RegDivFloat PROC uses si di, x1:word, x2:word, y1:word, y2:word
;SinCos086 PROC uses si di, LoNum:WORD, HiNum:WORD, SinAddr:WORD, \
;_e2xLT PROC ;argument in dx.ax (bitshift=16 is hard coded here)
;Exp086 PROC uses si di, LoNum:WORD, HiNum:WORD
;SinhCosh086 PROC uses si di, LoNum:WORD, HiNum:WORD, SinhAddr:WORD, \
;Log086 PROC uses si di, LoNum:WORD, HiNum:WORD, Fudge:WORD
IFDEF ??version
MASM51
QUIRKS
ENDIF
.model medium, c
extrn cos:far
extrn _Loaded387sincos:far
extrn compiled_by_turboc:word
.data
extrn cpu:WORD
PUBLIC TrigLimit, TrigOverflow
; CAE 6Nov92 made these public for PARSERA.ASM */
PUBLIC _1_, _2_, PointFive, infinity
PiFg13 dw 6487h
InvPiFg33 dd 0a2f9836eh
InvPiFg16 dw 517ch
Ln2Fg16 dw 0b172h ;ln(2) * 2^16 . True value is b172.18
Ln2Fg15 dw 058b9h ;used by e2xLT. True value is 58b9.0C
TrigOverflow dw 0
TrigLimit dd 0
;
;Table of 2^(n/16) for n=0 to 15. All entries fg15.
;Used by e2xLT
;
Table dw 08000h,085abh,08b96h,091c4h
dw 09838h,09ef5h,0a5ffh,0ad58h
dw 0b505h,0bd09h,0c567h,0ce25h
dw 0d745h,0e0cdh,0eac1h,0f525h
one dw ?
expSign dw ?
a dw ?
SinNeg dw ? ;CJLT - not now needed by SinCos086, but by
CosNeg dw ? ; ArcTan086
Ans dq ?
fake_es dw ? ; <BDT> Windows can't use ES for storage
TaylorTerm MACRO
LOCAL Ratio
add Factorial, one
jnc SHORT Ratio
rcr Factorial, 1
shr Num, 1
shr one, 1
Ratio:
mul Num
div Factorial
ENDM
_4_ dq 4.0
_2_ dq 2.0
_1_ dq 1.0
PointFive dq 0.5
temp dq ?
Sign dw ?
extrn fpu:word
.code
FPUcplxmul PROC x:word, y:word, z:word
mov bx, x
fld QWORD PTR [bx] ; x.x
fld QWORD PTR [bx+8] ; x.y, x.x
mov bx, y
fld QWORD PTR [bx] ; y.x, x.y, x.x
fld QWORD PTR [bx+8] ; y.y, y.x, x.y, x.x
mov bx, z
fld st ; y.y, y.y, y.x, x.y, x.x
fmul st, st(3) ; y.y*x.y, y.y. y.x, x.y, x.x
fld st(2) ; y.x, y.y*x.y, y.y, y.x, x.y, x.x
fmul st, st(5) ; y.x*x.x, y.y*x.y, y.y, y.x, x.y, x.x
fsubr ; y.x*x.x - y.y*x.y, y.y, y.x, x.y, x.x
fstp QWORD PTR [bx] ; y.y, y.x, x.y, x.x
fmulp st(3), st ; y.x, x.y, x.x*y.y
fmul ; y.x*x.y, x.x*y.y
fadd ; y.x*x.y + x.x*y.y
fstp QWORD PTR [bx+8]
ret
FPUcplxmul ENDP
.DATA
infinity dq 1.0E+300
PUBLIC DivideOverflow
DivideOverflow dw 0
.CODE
FPUcplxdiv PROC x:word, y:word, z:word
LOCAL Status:WORD
mov bx, x
fld QWORD PTR [bx] ; x.x
fld QWORD PTR [bx+8] ; x.y, x.x
mov bx, y
fld QWORD PTR [bx] ; y.x, x.y, x.x
fld QWORD PTR [bx+8] ; y.y, y.x, x.y, x.x
fld st ; y.y, y.y, y.x, x.y, x.x
fmul st, st ; y.y*y.y, y.y, y.x, x.y, x.x
fld st(2) ; y.x, y.y*y.y, y.y, y.x, x.y, x.x
fmul st, st ; y.x*y.x, y.y*y.y, y.y, y.x, x.y, x.x
fadd ; mod, y.y, y.x, x.y, x.x
ftst ; test whether mod is (0,0)
fstsw Status
mov ax, Status
and ah, 01000101b
cmp ah, 01000000b
jne NotZero
fstp st
fstp st
fstp st
fstp st
fstp st
fld infinity
fld st
mov bx, z
fstp QWORD PTR [bx]
fstp QWORD PTR [bx+8]
inc DivideOverflow
jmp ExitDiv
NotZero:
fdiv st(1), st ; mod, y.y=y.y/mod, y.x, x.y, x.x
fdivp st(2), st ; y.y, y.x=y.x/mod, x.y, x.x
mov bx, z
fld st ; y.y, y.y, y.x, x.y, x.x
fmul st, st(3) ; y.y*x.y, y.y. y.x, x.y, x.x
fld st(2) ; y.x, y.y*x.y, y.y, y.x, x.y, x.x
fmul st, st(5) ; y.x*x.x, y.y*x.y, y.y, y.x, x.y, x.x
fadd ; y.x*x.x - y.y*x.y, y.y, y.x, x.y, x.x
fstp QWORD PTR [bx] ; y.y, y.x, x.y, x.x
fmulp st(3), st ; y.x, x.y, x.x*y.y
fmul ; y.x*x.y, x.x*y.y
fsubr ; y.x*x.y + x.x*y.y
fstp QWORD PTR [bx+8]
ExitDiv:
ret
FPUcplxdiv ENDP
FPUcplxlog PROC x:word, z:word
LOCAL Status:word, ImagZero:WORD
mov bx, x
mov ax, WORD PTR [bx+8+6]
mov ImagZero, ax
or ax, WORD PTR [bx+6]
jnz NotBothZero
fldz
fldz
jmp StoreZX
NotBothZero:
fld QWORD PTR [bx+8] ; x.y
fld QWORD PTR [bx] ; x.x, x.y
mov bx, z
fldln2 ; ln2, x.x, x.y
fdiv _2_ ; ln2/2, x.x, x.y
fld st(2) ; x.y, ln2/2, x.x, x.y
fmul st, st ; sqr(x.y), ln2/2, x.x, x.y
fld st(2) ; x.x, sqr(x.y), ln2/2, x.x, x.y
fmul st, st ; sqr(x.x), sqr(x.y), ln2/2, x.x, x.y
fadd ; mod, ln2/2, x.x, x.y
fyl2x ; z.x, x.x, x.y
fxch st(2) ; x.y, x.x, z.x
fxch ; x.x, x.y, z.x
cmp fpu, 387
jne Restricted
fpatan ; z.y, z.x
jmp StoreZX
Restricted:
mov bx, x
mov dh, BYTE PTR [bx+7]
or dh, dh
jns ChkYSign
fchs ; |x.x|, x.y, z.x
ChkYSign:
mov dl, BYTE PTR [bx+8+7]
or dl, dl
jns ChkMagnitudes
fxch ; x.y, |x.x|, z.x
fchs ; |x.y|, |x.x|, z.x
fxch ; |x.x|, |x.y|, z.x
ChkMagnitudes:
fcom st(1) ; x.x, x.y, z.x
fstsw Status ; x.x, x.y, z.x
test Status, 4500h
jz XisGTY
test Status, 4000h
jz XneY
fstp st ; x.y, z.x
fstp st ; z.x
fldpi ; Pi, z.x
fdiv _4_ ; Pi/4, z.x
jmp ChkSignZ
XneY:
fxch ; x.y, x.x, z.x
fpatan ; Pi/2 - Angle, z.x
fldpi ; Pi, Pi/2 - Angle, z.x
fdiv _2_ ; Pi/2, Pi/2 - Angle, z.x
fsubr ; Angle, z.x
jmp ChkSignZ
XisGTY:
fpatan ; Pi-Angle or Angle+Pi, z.x
ChkSignZ:
or dh, dh
js NegX
or dl, dl
jns StoreZX
fchs
jmp StoreZX
NegX:
or dl, dl
js QuadIII
fldpi ; Pi, Pi-Angle, z.x
fsubr ; Angle, z.x
jmp StoreZX
QuadIII:
fldpi ; Pi, Angle+Pi, z.x
fsub ; Angle, z.x
StoreZX:
mov bx, z
fstp QWORD PTR [bx+8] ; z.x
fstp QWORD PTR [bx] ; <empty>
ret
FPUcplxlog ENDP
FPUsinhcosh PROC x:word, sinh:word, cosh:word
LOCAL Control:word
fstcw Control
push Control ; Save control word on the stack
or Control, 0000110000000000b
fldcw Control ; Set control to round towards zero
mov Sign, 0 ; Assume the sign is positive
mov bx, x
fldln2 ; ln(2)
fdivr QWORD PTR [bx] ; x/ln(2)
cmp BYTE PTR [bx+7], 0
jns DuplicateX
fchs ; x = |x|
DuplicateX:
fld st ; x/ln(2), x/ln(2)
frndint ; int = integer(|x|/ln(2)), x/ln(2)
fxch ; x/ln(2), int
fsub st, st(1) ; rem < 1.0, int
fmul PointFive ; rem/2 < 0.5, int
; CAE 7Dec92 changed above from divide by 2 to multiply by .5
f2xm1 ; (2**rem/2)-1, int
fadd _1_ ; 2**rem/2, int
fmul st, st ; 2**rem, int
fscale ; e**|x|, int
fstp st(1) ; e**|x|
cmp BYTE PTR [bx+7], 0
jns ExitFexp
fdivr _1_ ; e**x
ExitFexp:
fld st ; e**x, e**x
fdivr PointFive ; e**-x/2, e**x
fld st ; e**-x/2, e**-x/2, e**x
fxch st(2) ; e**x, e**-x/2, e**-x/2
fmul PointFive ; e**x/2, e**-x/2, e**-x/2
; CAE 7Dec92 changed above from divide by 2 to multiply by .5
fadd st(2), st ; e**x/2, e**-x/2, cosh(x)
fsubr ; sinh(x), cosh(x)
mov bx, sinh ; sinh, cosh
fstp QWORD PTR [bx] ; cosh
mov bx, cosh
fstp QWORD PTR [bx] ; <empty>
pop Control
fldcw Control ; Restore control word
ret
FPUsinhcosh ENDP
FPUsincos PROC x:word, sinx:word, cosx:word
LOCAL Status:word
mov bx, x
fld QWORD PTR [bx] ; x
cmp fpu, 387
jne Use387FPUsincos
call _Loaded387sincos ; cos(x), sin(x)
mov bx, cosx
fstp QWORD PTR [bx] ; sin(x)
mov bx, sinx
fstp QWORD PTR [bx] ; <empty>
ret
Use387FPUsincos:
sub sp, 8 ; save 'x' on the CPU stack
mov bx, sp
fstp QWORD PTR [bx] ; FPU stack: <empty>
call cos
add sp, 8 ; take 'cos(x)' off the CPU stack
mov bx, ax
cmp compiled_by_turboc,0
jne turbo_c1
fld QWORD PTR [bx] ; FPU stack: cos(x)
turbo_c1:
fld st ; FPU stack: cos(x), cos(x)
fmul st, st ; cos(x)**2, cos(x)
fsubr _1_ ; sin(x)**2, cos(x)
fsqrt ; +/-sin(x), cos(x)
mov bx, x
fld QWORD PTR [bx] ; x, +/-sin(x), cos(x)
fldpi ; Pi, x, +/-sin(x), cos(x)
fadd st, st ; 2Pi, x, +/-sin(x), cos(x)
fxch ; |x|, 2Pi, +/-sin(x), cos(x)
fprem ; Angle, 2Pi, +/-sin(x), cos(x)
fstp st(1) ; Angle, +/-sin(x), cos(x)
fldpi ; Pi, Angle, +/-sin(x), cos(x)
cmp BYTE PTR [bx+7], 0
jns SignAlignedPi
fchs ; -Pi, Angle, +/-sin(x), cos(x)
SignAlignedPi:
fcompp ; +/-sin(x), cos(x)
fstsw Status ; +/-sin(x), cos(x)
mov ax, Status
and ah, 1
jz StoreSinCos ; Angle <= Pi
fchs ; sin(x), cos(x)
StoreSinCos:
mov bx, sinx
fstp QWORD PTR [bx] ; cos(x)
mov bx, cosx
fstp QWORD PTR [bx] ; <empty>
ret
FPUsincos ENDP
PUBLIC r16Mul
r16Mul PROC uses si di, x1:word, x2:word, y1:word, y2:word
mov si, x1
mov bx, x2
mov di, y1
mov cx, y2
xor ax, ax
shl bx, 1
jz Exitr16Mult ; Destination is zero
rcl ah, 1
shl cx, 1
jnz Chkr16Exp
xor bx, bx ; Source is zero
xor si, si
jmp Exitr16Mult
Chkr16Exp:
rcl al, 1
xor ah, al ; Resulting sign in ah
stc ; Put 'one' bit back into number
rcr bl, 1
stc
rcr cl, 1
sub ch, 127 ; Determine resulting exponent
add bh, ch
mov al, bh
mov fake_es, ax ; es has the resulting exponent and sign
mov ax, di
mov al, ah
mov ah, cl
mov dx, si
mov dl, dh
mov dh, bl
mul dx
mov cx, fake_es
shl ax, 1
rcl dx, 1
jnc Remr16MulOneBit ; 'One' bit is the next bit over
inc cl ; 'One' bit removed with previous shift
jmp Afterr16MulNorm
Remr16MulOneBit:
shl ax, 1
rcl dx, 1
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -