📄 r_draw16.asm
字号:
sbb ecx,ecx
mov ds:byte ptr[4+edi],al
add ebx,ebp
mov al,ds:byte ptr[esi]
adc esi,ds:dword ptr[advancetable+4+ecx*4]
add edx,ds:dword ptr[tstep]
sbb ecx,ecx
mov ds:byte ptr[5+edi],al
add ebx,ebp
mov al,ds:byte ptr[esi]
adc esi,ds:dword ptr[advancetable+4+ecx*4]
add edx,ds:dword ptr[tstep]
sbb ecx,ecx
mov ds:byte ptr[6+edi],al
add ebx,ebp
mov al,ds:byte ptr[esi]
adc esi,ds:dword ptr[advancetable+4+ecx*4]
add edx,ds:dword ptr[tstep]
sbb ecx,ecx
mov ds:byte ptr[7+edi],al
add ebx,ebp
mov al,ds:byte ptr[esi]
adc esi,ds:dword ptr[advancetable+4+ecx*4]
;
; start FDIV for end of next segment in flight, so it can overlap
;
mov ecx,ds:dword ptr[counttemp]
cmp ecx,16 ; more than one segment after this?
ja LSetupNotLast2 ; yes
dec ecx
jz LFDIVInFlight2 ; if only one pixel, no need to start an FDIV
mov ds:dword ptr[spancountminus1],ecx
fild ds:dword ptr[spancountminus1]
fld ds:dword ptr[_d_zistepu] ; C(d_zistepu) | spancountminus1
fmul st(0),st(1) ; C(d_zistepu)*scm1 | scm1
fld ds:dword ptr[_d_tdivzstepu] ; C(d_tdivzstepu) | C(d_zistepu)*scm1 | scm1
fmul st(0),st(2) ; C(d_tdivzstepu)*scm1 | C(d_zistepu)*scm1 | scm1
fxch st(1) ; C(d_zistepu)*scm1 | C(d_tdivzstepu)*scm1 | scm1
faddp st(3),st(0) ; C(d_tdivzstepu)*scm1 | scm1
fxch st(1) ; scm1 | C(d_tdivzstepu)*scm1
fmul ds:dword ptr[_d_sdivzstepu] ; C(d_sdivzstepu)*scm1 | C(d_tdivzstepu)*scm1
fxch st(1) ; C(d_tdivzstepu)*scm1 | C(d_sdivzstepu)*scm1
faddp st(3),st(0) ; C(d_sdivzstepu)*scm1
fld ds:dword ptr[fp_64k] ; 64k | C(d_sdivzstepu)*scm1
fxch st(1) ; C(d_sdivzstepu)*scm1 | 64k
faddp st(4),st(0) ; 64k
fdiv st(0),st(1) ; this is what we've gone to all this trouble to
; overlap
jmp LFDIVInFlight2
align 4
LSetupNotLast2:
fadd ds:dword ptr[zi16stepu]
fxch st(2)
fadd ds:dword ptr[sdivz16stepu]
fxch st(2)
fld ds:dword ptr[tdivz16stepu]
faddp st(2),st(0)
fld ds:dword ptr[fp_64k]
fdiv st(0),st(1) ; z = 1/1/z
; this is what we've gone to all this trouble to
; overlap
LFDIVInFlight2:
mov ds:dword ptr[counttemp],ecx
add edx,ds:dword ptr[tstep]
sbb ecx,ecx
mov ds:byte ptr[8+edi],al
add ebx,ebp
mov al,ds:byte ptr[esi]
adc esi,ds:dword ptr[advancetable+4+ecx*4]
add edx,ds:dword ptr[tstep]
sbb ecx,ecx
mov ds:byte ptr[9+edi],al
add ebx,ebp
mov al,ds:byte ptr[esi]
adc esi,ds:dword ptr[advancetable+4+ecx*4]
add edx,ds:dword ptr[tstep]
sbb ecx,ecx
mov ds:byte ptr[10+edi],al
add ebx,ebp
mov al,ds:byte ptr[esi]
adc esi,ds:dword ptr[advancetable+4+ecx*4]
add edx,ds:dword ptr[tstep]
sbb ecx,ecx
mov ds:byte ptr[11+edi],al
add ebx,ebp
mov al,ds:byte ptr[esi]
adc esi,ds:dword ptr[advancetable+4+ecx*4]
add edx,ds:dword ptr[tstep]
sbb ecx,ecx
mov ds:byte ptr[12+edi],al
add ebx,ebp
mov al,ds:byte ptr[esi]
adc esi,ds:dword ptr[advancetable+4+ecx*4]
add edx,ds:dword ptr[tstep]
sbb ecx,ecx
mov ds:byte ptr[13+edi],al
add ebx,ebp
mov al,ds:byte ptr[esi]
adc esi,ds:dword ptr[advancetable+4+ecx*4]
add edx,ds:dword ptr[tstep]
sbb ecx,ecx
mov ds:byte ptr[14+edi],al
add ebx,ebp
mov al,ds:byte ptr[esi]
adc esi,ds:dword ptr[advancetable+4+ecx*4]
add edi,16
mov ds:dword ptr[tfracf],edx
mov edx,ds:dword ptr[snext]
mov ds:dword ptr[sfracf],ebx
mov ebx,ds:dword ptr[tnext]
mov ds:dword ptr[s],edx
mov ds:dword ptr[t],ebx
mov ecx,ds:dword ptr[counttemp] ; retrieve count
;
; determine whether last span or not
;
cmp ecx,16 ; are there multiple segments remaining?
mov ds:byte ptr[-1+edi],al
ja LNotLastSegment ; yes
;
; last segment of scan
;
LLastSegment:
;
; advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
; get there. The number of pixels left is variable, and we want to land on the
; last pixel, not step one past it, so we can't run into arithmetic problems
;
test ecx,ecx
jz LNoSteps ; just draw the last pixel and we're done
; pick up after the FDIV that was left in flight previously
fld st(0) ; duplicate it
fmul st(0),st(4) ; s = s/z * z
fxch st(1)
fmul st(0),st(3) ; t = t/z * z
fxch st(1)
fistp ds:dword ptr[snext]
fistp ds:dword ptr[tnext]
mov al,ds:byte ptr[esi] ; load first texel in segment
mov ebx,ds:dword ptr[_tadjust]
mov ds:byte ptr[edi],al ; store first pixel in segment
mov eax,ds:dword ptr[_sadjust]
add eax,ds:dword ptr[snext]
add ebx,ds:dword ptr[tnext]
mov ebp,ds:dword ptr[_bbextents]
mov edx,ds:dword ptr[_bbextentt]
cmp eax,4096
jl LClampLow4
cmp eax,ebp
ja LClampHigh4
LClampReentry4:
mov ds:dword ptr[snext],eax
cmp ebx,4096
jl LClampLow5
cmp ebx,edx
ja LClampHigh5
LClampReentry5:
cmp ecx,1 ; don't bother
je LOnlyOneStep ; if two pixels in segment, there's only one step,
; of the segment length
sub eax,ds:dword ptr[s]
sub ebx,ds:dword ptr[t]
add eax,eax ; convert to 15.17 format so multiply by 1.31
add ebx,ebx ; reciprocal yields 16.48
imul ds:dword ptr[reciprocal_table_16-8+ecx*4] ; sstep = (snext - s) /
; (spancount-1)
mov ebp,edx
mov eax,ebx
imul ds:dword ptr[reciprocal_table_16-8+ecx*4] ; tstep = (tnext - t) /
; (spancount-1)
LSetEntryvec:
;
; set up advancetable
;
mov ebx,ds:dword ptr[entryvec_table_16+ecx*4]
mov eax,edx
mov ds:dword ptr[jumptemp],ebx ; entry point into code for RET later
mov ecx,ebp
sar edx,16 ; tstep >>= 16;
mov ebx,ds:dword ptr[_cachewidth]
sar ecx,16 ; sstep >>= 16;
imul edx,ebx
add edx,ecx ; add in sstep
; (tstep >> 16) * cachewidth + (sstep >> 16);
mov ecx,ds:dword ptr[tfracf]
mov ds:dword ptr[advancetable+4],edx ; advance base in t
add edx,ebx ; ((tstep >> 16) + 1) * cachewidth +
; (sstep >> 16);
shl ebp,16 ; left-justify sstep fractional part
mov ebx,ds:dword ptr[sfracf]
shl eax,16 ; left-justify tstep fractional part
mov ds:dword ptr[advancetable],edx ; advance extra in t
mov ds:dword ptr[tstep],eax
mov edx,ecx
add edx,eax
sbb ecx,ecx
add ebx,ebp
adc esi,ds:dword ptr[advancetable+4+ecx*4]
jmp dword ptr[jumptemp] ; jump to the number-of-pixels handler
;----------------------------------------
LNoSteps:
mov al,ds:byte ptr[esi] ; load first texel in segment
sub edi,15 ; adjust for hardwired offset
jmp LEndSpan
LOnlyOneStep:
sub eax,ds:dword ptr[s]
sub ebx,ds:dword ptr[t]
mov ebp,eax
mov edx,ebx
jmp LSetEntryvec
;----------------------------------------
public Entry2_16, Entry3_16, Entry4_16, Entry5_16
public Entry6_16, Entry7_16, Entry8_16, Entry9_16
public Entry10_16, Entry11_16, Entry12_16, Entry13_16
public Entry14_16, Entry15_16, Entry16_16
Entry2_16:
sub edi,14 ; adjust for hardwired offsets
mov al,ds:byte ptr[esi]
jmp LEntry2_16
;----------------------------------------
Entry3_16:
sub edi,13 ; adjust for hardwired offsets
add edx,eax
mov al,ds:byte ptr[esi]
sbb ecx,ecx
add ebx,ebp
adc esi,ds:dword ptr[advancetable+4+ecx*4]
jmp LEntry3_16
;----------------------------------------
Entry4_16:
sub edi,12 ; adjust for hardwired offsets
add edx,eax
mov al,ds:byte ptr[esi]
sbb ecx,ecx
add ebx,ebp
adc esi,ds:dword ptr[advancetable+4+ecx*4]
add edx,ds:dword ptr[tstep]
jmp LEntry4_16
;----------------------------------------
Entry5_16:
sub edi,11 ; adjust for hardwired offsets
add edx,eax
mov al,ds:byte ptr[esi]
sbb ecx,ecx
add ebx,ebp
adc esi,ds:dword ptr[advancetable+4+ecx*4]
add edx,ds:dword ptr[tstep]
jmp LEntry5_16
;----------------------------------------
Entry6_16:
sub edi,10 ; adjust for hardwired offsets
add edx,eax
mov al,ds:byte ptr[esi]
sbb ecx,ecx
add ebx,ebp
adc esi,ds:dword ptr[advancetable+4+ecx*4]
add edx,ds:dword ptr[tstep]
jmp LEntry6_16
;----------------------------------------
Entry7_16:
sub edi,9 ; adjust for hardwired offsets
add edx,eax
mov al,ds:byte ptr[esi]
sbb ecx,ecx
add ebx,ebp
adc esi,ds:dword ptr[advancetable+4+ecx*4]
add edx,ds:dword ptr[tstep]
jmp LEntry7_16
;----------------------------------------
Entry8_16:
sub edi,8 ; adjust for hardwired offsets
add edx,eax
mov al,ds:byte ptr[esi]
sbb ecx,ecx
add ebx,ebp
adc esi,ds:dword ptr[advancetable+4+ecx*4]
add edx,ds:dword ptr[tstep]
jmp LEntry8_16
;----------------------------------------
Entry9_16:
sub edi,7 ; adjust for hardwired offsets
add edx,eax
mov al,ds:byte ptr[esi]
sbb ecx,ecx
add ebx,ebp
adc esi,ds:dword ptr[advancetable+4+ecx*4]
add edx,ds:dword ptr[tstep]
jmp LEntry9_16
;----------------------------------------
Entry10_16:
sub edi,6 ; adjust for hardwired offsets
add edx,eax
mov al,ds:byte ptr[esi]
sbb ecx,ecx
add ebx,ebp
adc esi,ds:dword ptr[advancetable+4+ecx*4]
add edx,ds:dword ptr[tstep]
jmp LEntry10_16
;----------------------------------------
Entry11_16:
sub edi,5 ; adjust for hardwired offsets
add edx,eax
mov al,ds:byte ptr[esi]
sbb ecx,ecx
add ebx,ebp
adc esi,ds:dword ptr[advancetable+4+ecx*4]
add edx,ds:dword ptr[tstep]
jmp LEntry11_16
;----------------------------------------
Entry12_16:
sub edi,4 ; adjust for hardwired offsets
add edx,eax
mov al,ds:byte ptr[esi]
sbb ecx,ecx
add ebx,ebp
adc esi,ds:dword ptr[advancetable+4+ecx*4]
add edx,ds:dword ptr[tstep]
jmp LEntry12_16
;----------------------------------------
Entry13_16:
sub edi,3 ; adjust for hardwired offsets
add edx,eax
mov al,ds:byte ptr[esi]
sbb ecx,ecx
add ebx,ebp
adc esi,ds:dword ptr[advancetable+4+ecx*4]
add edx,ds:dword ptr[tstep]
jmp LEntry13_16
;----------------------------------------
Entry14_16:
sub edi,2 ; adjust for hardwired offsets
add edx,eax
mov al,ds:byte ptr[esi]
sbb ecx,ecx
add ebx,ebp
adc esi,ds:dword ptr[advancetable+4+ecx*4]
add edx,ds:dword ptr[tstep]
jmp LEntry14_16
;----------------------------------------
Entry15_16:
dec edi ; adjust for hardwired offsets
add edx,eax
mov al,ds:byte ptr[esi]
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -