📄 calcmand.asm
字号:
; CALCMAND.ASM - Mandelbrot/Julia Set calculation Routines
; This module runs as part of an overlay with calcfrac.c.
; It must not be called from anywhere other than calcfrac.
; The routines in this code perform Mandelbrot and Julia set
; calculations using 32-bit integer math as opposed to the
; "traditional" floating-point approach.
; This code relies on several tricks to run as quickly as it does.
; One can fake floating point arithmetic by using integer
; arithmetic and keeping track of the implied decimal point
; if things are reasonable -- and in this case, they are.
; I replaced code that looked like: z = x*y with code that
; looks like:
; ix = x * ifudge (outside the loops)
; iy = y * ifudge
; ....
; iz = (ix * iy) / ifudge (inside the loops)
; (and keep remembering that all the integers are "ifudged" bigger)
; The 386 has native 32-bit integer arithmetic, and (briefly) keeps
; 64-bit values around after 32-bit multiplies. If the result is
; divided down right away, you've got 64-bit arithmetic. You just
; have to ensure that the result after the divide is <= 32 bits long.
; CPUs predating the 386 have to emulate 32-bit arithmetic using
; 16-bit arithmetic, which is significantly slower.
; Dividing is slow -- but shifting is fast, and we can select our
; "fudge factor" to be a power of two, permitting us to use that
; method instead. In addition, the 386 can perform 32-bit wide
; shifting -- and even 64-bit shifts with the following logic:
; shdr eax,edx,cl
; shr edx,cl
; so we make sure that our "fudge factor" is a power of 2 and shift
; it down that way.
; Calcmand is hardcoded for a fudge factor of 2**29.
; Bert Tyler
; History since Fractint 16.0
; (See comments with CJLT in them)
; CJLT=Chris Lusby Taylor who has...
;
; 1. Speeded up 16 bit on 16 bit CPU
; Minor changes, notably prescaling to fg14 before multiplying
; instead of scaling the answer.
; Also, I added overflow detection after adding linit, since it
; seems this could overflow.
; Overall effect is about 10% faster on 386 with debugflag=8088
; 2. Speeded up 32 bit on 16 bit CPU
; The macro `square' is totally rewritten, as is the logic for 2xy,
; by prescaling x and y to fg31, not fg29. This allows us to do a
; 32 bit multiply in 3, not 4, 16 bit chunks while retaining full
; fg29 accuracy.
; Also, I removed lots of well-meaning but ineffective code handling
; special cases of zeros and tidied up the handling of negative numbers,
; so the routine is quite a bit shorter now and overall throughput of
; Mandel is now over 40% faster on a 386 with debugflag=8088.
; By the way, I was tempted to go the whole hog and replace x*x-y*y
; by (x+y)*(x-y) to reduce 4 16-bit multiplys to 3, but it makes
; escape detection a bit trickier. Another time, maybe.
;
; required for compatibility if Turbo ASM
IFDEF ??version
MASM51
QUIRKS
ENDIF
.MODEL medium,c
DGROUP group _DATA,_DATA2
.8086
; these must NOT be in any segment!!
; this get's rid of TURBO-C fixup errors
extrn keypressed:far ; this routine is in 'general.asm'
extrn getakey:far ; this routine is in 'general.asm'
extrn iplot_orbit:far ; this routine is in 'calcfrac.c'
extrn scrub_orbit:far ; this routine is in 'calcfrac.c'
_DATA2 segment DWORD PUBLIC 'DATA'
FUDGEFACTOR equ 29 ; default (non-potential) fudgefactor
; ************************ External variables *****************************
extrn fractype:word ; == 0 if Mandelbrot set, else Julia
extrn inside:word ; "inside" color, normally 1 (blue)
extrn outside:word ; "outside" color, normally -1 (iter)
extrn creal:dword, cimag:dword ; Julia Set Constant
extrn delmin:dword ; min increment - precision required
extrn maxit:word ; maximum iterations
extrn lm:dword ; magnitude bailout limit
extrn row:word, col:word ; current pixel to calc
extrn color:word ; color calculated for the pixel
extrn realcolor:word ; color before inside,etc adjustments
extrn reset_periodicity:word ; nonzero if to be reset
extrn kbdcount:word ; keyboard counter
extrn cpu:word ; cpu type: 86, 186, 286, or 386
extrn dotmode:word
extrn show_orbit:word ; "show-orbit" flag
extrn orbit_ptr:word ; "orbit pointer" flag
extrn periodicitycheck:word ; no periodicity if zero
public linitx,linity ; caller sets these
public savedmask ; caller sets this
; ************************ Internal variables *****************************
align 4
x dd 0 ; temp value: x
y dd 0 ; temp value: y
absx dd 0 ; temp value: abs(x)
linitx dd 0 ; initial value, set by calcfrac
linity dd 0 ; initial value, set by calcfrac
savedmask dd 0 ; saved values mask
savedx dd 0 ; saved values of X and Y iterations
savedy dd 0 ; (for periodicity checks)
k dw 0 ; iteration countdown counter
oldcolor dw 0 ; prior pixel's escape time k value
savedand dw 0 ; AND value for periodicity checks
savedincr dw 0 ; flag for incrementing AND value
period db 0 ; periodicity, if in the lake
_DATA2 ends
.CODE
; ***************** Function calcmandasm() **********************************
public calcmandasm
FRAME MACRO regs
push bp
mov bp, sp
IRP reg, <regs>
push reg
ENDM
ENDM
UNFRAME MACRO regs
IRP reg, <regs>
pop reg
ENDM
pop bp
ENDM
calcmandasm proc
FRAME <di,si> ; std frame, for TC++ overlays
sub ax,ax ; clear ax
cmp periodicitycheck,ax ; periodicity checking disabled?
je initoldcolor ; yup, set oldcolor 0 to disable it
cmp reset_periodicity,ax ; periodicity reset?
je short initparms ; inherit oldcolor from prior invocation
mov ax,maxit ; yup. reset oldcolor to maxit-250
sub ax,250 ; (avoids slowness at high maxits)
initoldcolor:
mov oldcolor,ax ; reset oldcolor
initparms:
mov ax,word ptr creal ; initialize x == creal
mov dx,word ptr creal+2 ; ...
mov word ptr x,ax ; ...
mov word ptr x+2,dx ; ...
mov ax,word ptr cimag ; initialize y == cimag
mov dx,word ptr cimag+2 ; ...
mov word ptr y,ax ; ...
mov word ptr y+2,dx ; ...
mov ax,maxit ; setup k = maxit
inc ax ; (+ 1)
mov k,ax ; (decrementing to 0 is faster)
cmp fractype,1 ; julia or mandelbrot set?
je short dojulia ; julia set - go there
; (Tim wants this code changed so that, for the Mandelbrot,
; Z(1) = (x + iy) + (a + ib). Affects only "fudged" Mandelbrots.
; (for the "normal" case, a = b = 0, and this works, too)
; cmp word ptr x,0 ; Mandelbrot shortcut:
; jne short doeither ; if creal = cimag = 0,
; cmp word ptr x+2,0 ; the first iteration can be emulated.
; jne short doeither ; ...
; cmp word ptr y,0 ; ...
; jne short doeither ; ...
; cmp word ptr y+2,0 ; ...
; jne short doeither ; ...
; dec k ; we know the first iteration passed
; mov dx,word ptr linitx+2 ; copy x = linitx
; mov ax,word ptr linitx ; ...
; mov word ptr x+2,dx ; ...
; mov word ptr x,ax ; ...
; mov dx,word ptr linity+2 ; copy y = linity
; mov ax,word ptr linity ; ...
; mov word ptr y+2,dx ; ...
; mov word ptr y,ax ; ...
dec k ; we know the first iteration passed
mov dx,word ptr linitx+2 ; add x += linitx
mov ax,word ptr linitx ; ...
add word ptr x,ax ; ...
adc word ptr x+2,dx ; ...
mov dx,word ptr linity+2 ; add y += linity
mov ax,word ptr linity ; ...
add word ptr y,ax ; ...
adc word ptr y+2,dx ; ...
jmp short doeither ; branch around the julia switch
dojulia: ; Julia Set initialization
; "fudge" Mandelbrot start-up values
mov ax,word ptr x ; switch x with linitx
mov dx,word ptr x+2 ; ...
mov bx,word ptr linitx ; ...
mov cx,word ptr linitx+2 ; ...
mov word ptr x,bx ; ...
mov word ptr x+2,cx ; ...
mov word ptr linitx,ax ; ...
mov word ptr linitx+2,dx ; ...
mov ax,word ptr y ; switch y with linity
mov dx,word ptr y+2 ; ...
mov bx,word ptr linity ; ...
mov cx,word ptr linity+2 ; ...
mov word ptr y,bx ; ...
mov word ptr y+2,cx ; ...
mov word ptr linity,ax ; ...
mov word ptr linity+2,dx ; ...
doeither: ; common Mandelbrot, Julia set code
mov period,0 ; claim periodicity of 1
mov savedand,1 ; initial periodicity check
mov savedincr,1 ; flag for incrementing periodicity
mov word ptr savedx+2,0ffffh; impossible value of "old" x
mov word ptr savedy+2,0ffffh; impossible value of "old" y
mov orbit_ptr,0 ; clear orbits
dec kbdcount ; decrement the keyboard counter
jns short nokey ; skip keyboard test if still positive
mov kbdcount,10 ; stuff in a low kbd count
cmp show_orbit,0 ; are we showing orbits?
jne quickkbd ; yup. leave it that way.
mov kbdcount,5000 ; else, stuff an appropriate count val
cmp cpu,386 ; ("appropriate" to the CPU)
je short kbddiskadj ; ...
;; cmp word ptr delmin+2,1 ; is 16-bit math good enough?
cmp word ptr delmin+2,8 ; is 16-bit math good enough?
ja kbddiskadj ; yes. test less often
mov kbdcount,500 ; no. test more often
kbddiskadj:
cmp dotmode,11 ; disk video?
jne quickkbd ; no, leave as is
shr kbdcount,1 ; yes, reduce count
shr kbdcount,1 ; ...
quickkbd:
call far ptr keypressed ; has a key been pressed?
cmp ax,0 ; ...
je nokey ; nope. proceed
mov kbdcount,0 ; make sure it goes negative again
cmp ax,'o' ; orbit toggle hit?
je orbitkey ; yup. show orbits
cmp ax,'O' ; orbit toggle hit?
jne keyhit ; nope. normal key.
orbitkey:
call far ptr getakey ; read the key for real
mov ax,1 ; reset orbittoggle = 1 - orbittoggle
sub ax,show_orbit ; ...
mov show_orbit,ax ; ...
jmp short nokey ; pretend no key was hit
keyhit: mov ax,-1 ; return with -1
mov color,ax ; set color to -1
UNFRAME <si,di> ; pop stack frame
ret ; bail out!
nokey:
cmp show_orbit,0 ; is orbiting on?
jne no16bitcode ; yup. slow down.
cmp cpu,386 ; are we on a 386?
je short code386bit ; YAY!! 386-class speed!
;; cmp word ptr delmin+2,1 ; OK, we're desperate. 16 bits OK?
cmp word ptr delmin+2,8 ; OK, we're desperate. 16 bits OK?
ja yes16bitcode ; YAY! 16-bit speed!
no16bitcode:
call near ptr code32bit ; BOO!! nap time. Full 32 bit math
jmp kloopend ; bypass the 386-specific code.
yes16bitcode:
call near ptr code16bit ; invoke the 16-bit version
jmp kloopend ; bypass the 386-specific code.
.386 ; 386-specific code starts here
code386bit:
;; cmp word ptr delmin+2,3 ; is 16-bit math good enough?
cmp word ptr delmin+2,8 ; is 16-bit math good enough?
jbe code386_32 ; nope, go do 32 bit stuff
IFDEF ??version
jmp code386_32 ; TASM screws up IMUL EBX,EBX!!
ENDIF
; 16 bit on 386, now we are really gonna move
movsx esi,word ptr x+2 ; use SI for X
movsx edi,word ptr y+2 ; use DI for Y
push ebp
mov ebp,-1
shl ebp,FUDGEFACTOR-1
mov cx,FUDGEFACTOR-16
kloop386_16: ; cx=bitshift-16, ebp=overflow.mask
mov ebx,esi ; compute (x * x)
imul ebx,ebx ; ...
test ebx,ebp ;
jnz short end386_16 ; (oops. We done.)
shr ebx,cl ; get result down to 16 bits
mov edx,edi ; compute (y * y)
imul edx,edx ; ...
test edx,ebp ; say, did we overflow? <V20-compat>
jnz short end386_16 ; (oops. We done.)
shr edx,cl ; get result down to 16 bits
mov ax,bx ; compute (x*x - y*y) / fudge
sub bx,dx ; for the next iteration
add ax,dx ; compute (x*x + y*y) / fudge
cmp ax,word ptr lm+2 ; while (xx+yy < lm)
jae short end386_16 ; ...
imul edi,esi ; compute (y * x)
shl edi,1 ; ( * 2 / fudge)
sar edi,cl
add di,word ptr linity+2 ; (2*y*x) / fudge + linity
movsx edi,di ; save as y
add bx,word ptr linitx+2 ; (from above) (x*x - y*y)/fudge + linitx
movsx esi,bx ; save as x
mov ax,oldcolor ; recall the old color
cmp ax,k ; check it against this iter
jge short chkpd386_16 ; yup. do periodicity check.
nonmax386_16:
dec k ; while (k < maxit)
jnz short kloop386_16 ; try, try again
end386_16:
pop ebp
jmp kloopend ; we done
chkpd386_16:
mov ax,k ; set up to test for save-time
test ax,savedand ; save on 0, check on anything else
jz short chksv386_16 ; time to save a new "old" value
mov ax,si ; load up x
xor ax,word ptr savedx+2 ; does X match?
test ax,word ptr savedmask+2 ; truncate to appropriate precision
jne short nonmax386_16 ; nope. forget it.
mov ax,di ; now test y
xor ax,word ptr savedy+2 ; does Y match?
test ax,word ptr savedmask+2 ; truncate to appropriate precision
jne short nonmax386_16 ; nope. forget it.
mov period,1 ; note that we have found periodicity
mov k,0 ; pretend maxit reached
jmp short end386_16
chksv386_16:
mov word ptr savedx+2,si ; save x
mov word ptr savedy+2,di ; save y
dec savedincr ; time to change the periodicity?
jnz short nonmax386_16 ; nope.
shl savedand,1 ; well then, let's try this one!
inc savedand ; (2**n -1)
mov savedincr,4 ; and reset the increment flag
jmp short nonmax386_16
; 32bit on 386:
code386_32:
mov esi,x ; use ESI for X
mov edi,y ; use EDI for Y
; This is the main processing loop. Here, every T-state counts...
kloop: ; for (k = 0; k <= maxit; k++)
mov eax,esi ; compute (x * x)
imul esi ; ...
shrd eax,edx,FUDGEFACTOR ; ( / fudge)
shr edx,FUDGEFACTOR-1 ; (complete 64-bit shift and check
jne short kloopend1 ; bail out if too high
mov ebx,eax ; save this for below
mov eax,edi ; compute (y * y)
imul edi ; ...
shrd eax,edx,FUDGEFACTOR ; ( / fudge)
shr edx,FUDGEFACTOR-1 ; (complete 64-bit shift and check
jne short kloopend1 ; bail out if too high
mov ecx,ebx ; compute (x*x - y*y) / fudge
sub ebx,eax ; for the next iteration
add ecx,eax ; compute (x*x + y*y) / fudge
cmp ecx,lm ; while (lr < lm)
jae short kloopend1 ; ...
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -