⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 r_spr8.asm

📁 Quake 2 Source code for students by Theerthan You can also download from idsoftwares.com
💻 ASM
📖 第 1 页 / 共 2 页
字号:
 .386P
 .model FLAT
;
; d_spr8.s
; x86 assembly-language horizontal 8-bpp transparent span-drawing code.
;

include qasm.inc
include d_if.inc

if id386

;----------------------------------------------------------------------
; 8-bpp horizontal span drawing code for polygons, with transparency.
;----------------------------------------------------------------------

_TEXT SEGMENT	

; out-of-line, rarely-needed clamping code

LClampHigh0:	
 mov esi,ds:dword ptr[_bbextents]	
 jmp LClampReentry0	
LClampHighOrLow0:	
 jg LClampHigh0	
 xor esi,esi	
 jmp LClampReentry0	

LClampHigh1:	
 mov edx,ds:dword ptr[_bbextentt]	
 jmp LClampReentry1	
LClampHighOrLow1:	
 jg LClampHigh1	
 xor edx,edx	
 jmp LClampReentry1	

LClampLow2:	
 mov ebp,2048	
 jmp LClampReentry2	
LClampHigh2:	
 mov ebp,ds:dword ptr[_bbextents]	
 jmp LClampReentry2	

LClampLow3:	
 mov ecx,2048	
 jmp LClampReentry3	
LClampHigh3:	
 mov ecx,ds:dword ptr[_bbextentt]	
 jmp LClampReentry3	

LClampLow4:	
 mov eax,2048	
 jmp LClampReentry4	
LClampHigh4:	
 mov eax,ds:dword ptr[_bbextents]	
 jmp LClampReentry4	

LClampLow5:	
 mov ebx,2048	
 jmp LClampReentry5	
LClampHigh5:	
 mov ebx,ds:dword ptr[_bbextentt]	
 jmp LClampReentry5	


pspans	equ		4+16

 align 4	
 public _D_SpriteDrawSpansXXX
_D_SpriteDrawSpansXXX:	
 push ebp	; preserve caller's stack frame
 push edi	
 push esi	; preserve register variables
 push ebx	

;
; set up scaled-by-8 steps, for 8-long segments; also set up cacheblock
; and span list pointers, and 1/z step in 0.32 fixed-point
;
; FIXME: any overlap from rearranging?
 fld ds:dword ptr[_d_sdivzstepu]	
 fmul ds:dword ptr[fp_8]	
 mov edx,ds:dword ptr[_cacheblock]	
 fld ds:dword ptr[_d_tdivzstepu]	
 fmul ds:dword ptr[fp_8]	
 mov ebx,ds:dword ptr[pspans+esp]	; point to the first span descriptor
 fld ds:dword ptr[_d_zistepu]	
 fmul ds:dword ptr[fp_8]	
 mov ds:dword ptr[pbase],edx	; pbase = cacheblock
 fld ds:dword ptr[_d_zistepu]	
 fmul ds:dword ptr[fp_64kx64k]	
 fxch st(3)	
 fstp ds:dword ptr[sdivz8stepu]	
 fstp ds:dword ptr[zi8stepu]	
 fstp ds:dword ptr[tdivz8stepu]	
 fistp ds:dword ptr[izistep]	
 mov eax,ds:dword ptr[izistep]	
 ror eax,16	; put upper 16 bits in low word
 mov ecx,ds:dword ptr[sspan_t_count+ebx]	
 mov ds:dword ptr[izistep],eax	

 cmp ecx,0	
 jle LNextSpan	

LSpanLoop:	

;
; set up the initial s/z, t/z, and 1/z on the FP stack, and generate the
; initial s and t values
;
; FIXME: pipeline FILD?
 fild ds:dword ptr[sspan_t_v+ebx]	
 fild ds:dword ptr[sspan_t_u+ebx]	

 fld st(1)	; dv | du | dv
 fmul ds:dword ptr[_d_sdivzstepv]	; dv*d_sdivzstepv | du | dv
 fld st(1)	; du | dv*d_sdivzstepv | du | dv
 fmul ds:dword ptr[_d_sdivzstepu]	; du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
 fld st(2)	; du | du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
 fmul ds:dword ptr[_d_tdivzstepu]	; du*d_tdivzstepu | du*d_sdivzstepu |
;  dv*d_sdivzstepv | du | dv
 fxch st(1)	; du*d_sdivzstepu | du*d_tdivzstepu |
;  dv*d_sdivzstepv | du | dv
 faddp st(2),st(0)	; du*d_tdivzstepu |
;  du*d_sdivzstepu + dv*d_sdivzstepv | du | dv
 fxch st(1)	; du*d_sdivzstepu + dv*d_sdivzstepv |
;  du*d_tdivzstepu | du | dv
 fld st(3)	; dv | du*d_sdivzstepu + dv*d_sdivzstepv |
;  du*d_tdivzstepu | du | dv
 fmul ds:dword ptr[_d_tdivzstepv]	; dv*d_tdivzstepv |
;  du*d_sdivzstepu + dv*d_sdivzstepv |
;  du*d_tdivzstepu | du | dv
 fxch st(1)	; du*d_sdivzstepu + dv*d_sdivzstepv |
;  dv*d_tdivzstepv | du*d_tdivzstepu | du | dv
 fadd ds:dword ptr[_d_sdivzorigin]	; sdivz = d_sdivzorigin + dv*d_sdivzstepv +
;  du*d_sdivzstepu; stays in %st(2) at end
 fxch st(4)	; dv | dv*d_tdivzstepv | du*d_tdivzstepu | du |
;  s/z
 fmul ds:dword ptr[_d_zistepv]	; dv*d_zistepv | dv*d_tdivzstepv |
;  du*d_tdivzstepu | du | s/z
 fxch st(1)	; dv*d_tdivzstepv |  dv*d_zistepv |
;  du*d_tdivzstepu | du | s/z
 faddp st(2),st(0)	; dv*d_zistepv |
;  dv*d_tdivzstepv + du*d_tdivzstepu | du | s/z
 fxch st(2)	; du | dv*d_tdivzstepv + du*d_tdivzstepu |
;  dv*d_zistepv | s/z
 fmul ds:dword ptr[_d_zistepu]	; du*d_zistepu |
;  dv*d_tdivzstepv + du*d_tdivzstepu |
;  dv*d_zistepv | s/z
 fxch st(1)	; dv*d_tdivzstepv + du*d_tdivzstepu |
;  du*d_zistepu | dv*d_zistepv | s/z
 fadd ds:dword ptr[_d_tdivzorigin]	; tdivz = d_tdivzorigin + dv*d_tdivzstepv +
;  du*d_tdivzstepu; stays in %st(1) at end
 fxch st(2)	; dv*d_zistepv | du*d_zistepu | t/z | s/z
 faddp st(1),st(0)	; dv*d_zistepv + du*d_zistepu | t/z | s/z

 fld ds:dword ptr[fp_64k]	; fp_64k | dv*d_zistepv + du*d_zistepu | t/z | s/z
 fxch st(1)	; dv*d_zistepv + du*d_zistepu | fp_64k | t/z | s/z
 fadd ds:dword ptr[_d_ziorigin]	; zi = d_ziorigin + dv*d_zistepv +
;  du*d_zistepu; stays in %st(0) at end
; 1/z | fp_64k | t/z | s/z

 fld st(0)	; FIXME: get rid of stall on FMUL?
 fmul ds:dword ptr[fp_64kx64k]	
 fxch st(1)	

;
; calculate and clamp s & t
;
 fdiv st(2),st(0)	; 1/z | z*64k | t/z | s/z
 fxch st(1)	

 fistp ds:dword ptr[izi]	; 0.32 fixed-point 1/z
 mov ebp,ds:dword ptr[izi]	

;
; set pz to point to the first z-buffer pixel in the span
;
 ror ebp,16	; put upper 16 bits in low word
 mov eax,ds:dword ptr[sspan_t_v+ebx]	
 mov ds:dword ptr[izi],ebp	
 mov ebp,ds:dword ptr[sspan_t_u+ebx]	
 imul ds:dword ptr[_d_zrowbytes]	
 shl ebp,1	; a word per pixel
 add eax,ds:dword ptr[_d_pzbuffer]	
 add eax,ebp	
 mov ds:dword ptr[pz],eax	

;
; point %edi to the first pixel in the span
;
 mov ebp,ds:dword ptr[_d_viewbuffer]	
 mov eax,ds:dword ptr[sspan_t_v+ebx]	
 push ebx	; preserve spans pointer
 mov edx,ds:dword ptr[_tadjust]	
 mov esi,ds:dword ptr[_sadjust]	
 mov edi,ds:dword ptr[_d_scantable+eax*4]	; v * screenwidth
 add edi,ebp	
 mov ebp,ds:dword ptr[sspan_t_u+ebx]	
 add edi,ebp	; pdest = &pdestspan[scans->u];

;
; now start the FDIV for the end of the span
;
 cmp ecx,8	
 ja LSetupNotLast1	

 dec ecx	
 jz LCleanup1	; if only one pixel, no need to start an FDIV
 mov ds:dword ptr[spancountminus1],ecx	

; finish up the s and t calcs
 fxch st(1)	; z*64k | 1/z | t/z | s/z

 fld st(0)	; z*64k | z*64k | 1/z | t/z | s/z
 fmul st(0),st(4)	; s | z*64k | 1/z | t/z | s/z
 fxch st(1)	; z*64k | s | 1/z | t/z | s/z
 fmul st(0),st(3)	; t | s | 1/z | t/z | s/z
 fxch st(1)	; s | t | 1/z | t/z | s/z
 fistp ds:dword ptr[s]	; 1/z | t | t/z | s/z
 fistp ds:dword ptr[t]	; 1/z | t/z | s/z

 fild ds:dword ptr[spancountminus1]	

 fld ds:dword ptr[_d_tdivzstepu]	; _d_tdivzstepu | spancountminus1
 fld ds:dword ptr[_d_zistepu]	; _d_zistepu | _d_tdivzstepu | spancountminus1
 fmul st(0),st(2)	; _d_zistepu*scm1 | _d_tdivzstepu | scm1
 fxch st(1)	; _d_tdivzstepu | _d_zistepu*scm1 | scm1
 fmul st(0),st(2)	; _d_tdivzstepu*scm1 | _d_zistepu*scm1 | scm1
 fxch st(2)	; scm1 | _d_zistepu*scm1 | _d_tdivzstepu*scm1
 fmul ds:dword ptr[_d_sdivzstepu]	; _d_sdivzstepu*scm1 | _d_zistepu*scm1 |
;  _d_tdivzstepu*scm1
 fxch st(1)	; _d_zistepu*scm1 | _d_sdivzstepu*scm1 |
;  _d_tdivzstepu*scm1
 faddp st(3),st(0)	; _d_sdivzstepu*scm1 | _d_tdivzstepu*scm1
 fxch st(1)	; _d_tdivzstepu*scm1 | _d_sdivzstepu*scm1
 faddp st(3),st(0)	; _d_sdivzstepu*scm1
 faddp st(3),st(0)	

 fld ds:dword ptr[fp_64k]	
 fdiv st(0),st(1)	; this is what we've gone to all this trouble to
;  overlap
 jmp LFDIVInFlight1	

LCleanup1:	
; finish up the s and t calcs
 fxch st(1)	; z*64k | 1/z | t/z | s/z

 fld st(0)	; z*64k | z*64k | 1/z | t/z | s/z
 fmul st(0),st(4)	; s | z*64k | 1/z | t/z | s/z
 fxch st(1)	; z*64k | s | 1/z | t/z | s/z
 fmul st(0),st(3)	; t | s | 1/z | t/z | s/z
 fxch st(1)	; s | t | 1/z | t/z | s/z
 fistp ds:dword ptr[s]	; 1/z | t | t/z | s/z
 fistp ds:dword ptr[t]	; 1/z | t/z | s/z
 jmp LFDIVInFlight1	

 align 4	
LSetupNotLast1:	
; finish up the s and t calcs
 fxch st(1)	; z*64k | 1/z | t/z | s/z

 fld st(0)	; z*64k | z*64k | 1/z | t/z | s/z
 fmul st(0),st(4)	; s | z*64k | 1/z | t/z | s/z
 fxch st(1)	; z*64k | s | 1/z | t/z | s/z
 fmul st(0),st(3)	; t | s | 1/z | t/z | s/z
 fxch st(1)	; s | t | 1/z | t/z | s/z
 fistp ds:dword ptr[s]	; 1/z | t | t/z | s/z
 fistp ds:dword ptr[t]	; 1/z | t/z | s/z

 fadd ds:dword ptr[zi8stepu]	
 fxch st(2)	
 fadd ds:dword ptr[sdivz8stepu]	
 fxch st(2)	
 fld ds:dword ptr[tdivz8stepu]	
 faddp st(2),st(0)	
 fld ds:dword ptr[fp_64k]	
 fdiv st(0),st(1)	; z = 1/1/z
; this is what we've gone to all this trouble to
;  overlap
LFDIVInFlight1:	

 add esi,ds:dword ptr[s]	
 add edx,ds:dword ptr[t]	
 mov ebx,ds:dword ptr[_bbextents]	
 mov ebp,ds:dword ptr[_bbextentt]	
 cmp esi,ebx	
 ja LClampHighOrLow0	
LClampReentry0:	
 mov ds:dword ptr[s],esi	
 mov ebx,ds:dword ptr[pbase]	
 shl esi,16	
 cmp edx,ebp	
 mov ds:dword ptr[sfracf],esi	
 ja LClampHighOrLow1	
LClampReentry1:	
 mov ds:dword ptr[t],edx	
 mov esi,ds:dword ptr[s]	; sfrac = scans->sfrac;
 shl edx,16	
 mov eax,ds:dword ptr[t]	; tfrac = scans->tfrac;
 sar esi,16	
 mov ds:dword ptr[tfracf],edx	

;
; calculate the texture starting address
;
 sar eax,16	
 add esi,ebx	
 imul eax,ds:dword ptr[_cachewidth]	; (tfrac >> 16) * cachewidth
 add esi,eax	; psource = pbase + (sfrac >> 16) +
;           ((tfrac >> 16) * cachewidth);

;
; determine whether last span or not
;
 cmp ecx,8	
 jna LLastSegment	

;
; not the last segment; do full 8-wide segment
;
LNotLastSegment:	

;
; advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
; get there
;

; pick up after the FDIV that was left in flight previously

 fld st(0)	; duplicate it
 fmul st(0),st(4)	; s = s/z * z
 fxch st(1)	
 fmul st(0),st(3)	; t = t/z * z
 fxch st(1)	
 fistp ds:dword ptr[snext]	
 fistp ds:dword ptr[tnext]	
 mov eax,ds:dword ptr[snext]	
 mov edx,ds:dword ptr[tnext]	

 sub ecx,8	; count off this segments' pixels
 mov ebp,ds:dword ptr[_sadjust]	
 push ecx	; remember count of remaining pixels
 mov ecx,ds:dword ptr[_tadjust]	

 add ebp,eax	
 add ecx,edx	

 mov eax,ds:dword ptr[_bbextents]	
 mov edx,ds:dword ptr[_bbextentt]	

 cmp ebp,2048	
 jl LClampLow2	
 cmp ebp,eax	
 ja LClampHigh2	
LClampReentry2:	

 cmp ecx,2048	
 jl LClampLow3	
 cmp ecx,edx	
 ja LClampHigh3	
LClampReentry3:	

 mov ds:dword ptr[snext],ebp	
 mov ds:dword ptr[tnext],ecx	

 sub ebp,ds:dword ptr[s]	
 sub ecx,ds:dword ptr[t]	

;
; set up advancetable
;
 mov eax,ecx	
 mov edx,ebp	
 sar edx,19	; sstep >>= 16;
 mov ebx,ds:dword ptr[_cachewidth]	
 sar eax,19	; tstep >>= 16;
 jz LIsZero	
 imul eax,ebx	; (tstep >> 16) * cachewidth;
LIsZero:	
 add eax,edx	; add in sstep
; (tstep >> 16) * cachewidth + (sstep >> 16);
 mov edx,ds:dword ptr[tfracf]	
 mov ds:dword ptr[advancetable+4],eax	; advance base in t
 add eax,ebx	; ((tstep >> 16) + 1) * cachewidth +
;  (sstep >> 16);
 shl ebp,13	; left-justify sstep fractional part
 mov ds:dword ptr[sstep],ebp	
 mov ebx,ds:dword ptr[sfracf]	
 shl ecx,13	; left-justify tstep fractional part
 mov ds:dword ptr[advancetable],eax	; advance extra in t
 mov ds:dword ptr[tstep],ecx	

 mov ecx,ds:dword ptr[pz]	
 mov ebp,ds:dword ptr[izi]	

 cmp bp,ds:word ptr[ecx]	
 jl Lp1	
 mov al,ds:byte ptr[esi]	; get first source texel
 cmp al,offset TRANSPARENT_COLOR	
 jz Lp1	
 mov ds:word ptr[ecx],bp	
 mov ds:byte ptr[edi],al	; store first dest pixel
Lp1:	
 add ebp,ds:dword ptr[izistep]	
 adc ebp,0	
 add edx,ds:dword ptr[tstep]	; advance tfrac fractional part by tstep frac

 sbb eax,eax	; turn tstep carry into -1 (0 if none)
 add ebx,ds:dword ptr[sstep]	; advance sfrac fractional part by sstep frac
 adc esi,ds:dword ptr[advancetable+4+eax*4]	; point to next source texel

 cmp bp,ds:word ptr[2+ecx]	
 jl Lp2	
 mov al,ds:byte ptr[esi]	
 cmp al,offset TRANSPARENT_COLOR	
 jz Lp2	
 mov ds:word ptr[2+ecx],bp	
 mov ds:byte ptr[1+edi],al	
Lp2:	
 add ebp,ds:dword ptr[izistep]	
 adc ebp,0	
 add edx,ds:dword ptr[tstep]	
 sbb eax,eax	
 add ebx,ds:dword ptr[sstep]	
 adc esi,ds:dword ptr[advancetable+4+eax*4]	

 cmp bp,ds:word ptr[4+ecx]	
 jl Lp3	
 mov al,ds:byte ptr[esi]	
 cmp al,offset TRANSPARENT_COLOR	
 jz Lp3	
 mov ds:word ptr[4+ecx],bp	
 mov ds:byte ptr[2+edi],al	
Lp3:	
 add ebp,ds:dword ptr[izistep]	
 adc ebp,0	
 add edx,ds:dword ptr[tstep]	
 sbb eax,eax	
 add ebx,ds:dword ptr[sstep]	
 adc esi,ds:dword ptr[advancetable+4+eax*4]	

 cmp bp,ds:word ptr[6+ecx]	

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -