📄 hxfassemblearm.s
字号:
tst r8, #HXF_PA_CULLDIR_CW ; if 0, CCW; 1, CW
; The only difference in computation is really a which term (Ax oy Ay) gets negated.
; CW: formula Nz.EyeZ = (AxBy - AyBx ) . [0,0,-1]
; CW: = (AyBx - AxBy)
; CCW: formula Nz.EyeZ = AxBy - AyBx . [0,0,1]
; CCW: = (AxBy - AyBx)
subne r11, r7, r2 ; CW: -Ax = -(bx - ax ) = ax - bx
subeq r11, r2, r7 ; CCW: Ax = bx - ax ;
ldr r2, [r6] ; cx
subne r12, r3, r1 ; CW: Ay = by - ay
subeq r12, r1, r3 ; CCW: -Ay = -(by - ay) = ay -by ;
ldr r3, [r6,#4] ; cy
; r7 = ax
; r1 = ay
; r2 = cx
; r3 = cy
; r11 = -/+Ax (CW/CCW)
; r12 = +/-Ay
sub r7, r2, r7 ; Bx = cx - ax
sub r1, r3, r1 ; By = cy - ay
; r7 = Bx
; r1 = By
; r11 = -/+Ax (CW/CCW)
; r12 = +/-Ay
; 2) Cross A into B to get Nz ONLY, then compare sign (don't need anything else,
; as camera in projected space is at the origin)
smull r3, r2, r11, r1 ; AxBy in 64bits of r3r12
smlals r3, r2, r12, r7 ; add -AyBx in 64 bits to r3r12, need middle 32
; r2 = 32.0 of Nz.EyeZ
; r3 = 0.32 of Nz.EyeZ
; Since we only care about the sign, just look at hi word in r3
; We expect the program to check for a negative!
blt ARM_ASSEMBLE_IDX_TRISTRIP_NODRAW
; Draw - gets here from cull disabled, cull pass and clip disabled, or cull/clip pass.
ARM_ASSEMBLE_IDX_TRISTRIP_DRAW
; ---------------------------------------------------------------------- --
; Register Map -
; ---------------------------------------------------------------------- --
; r0 = pState r4 = pVtxA r8 = flags|cnt r12 =
; r1 = r5 = pVtxB r9 = pIndices r13 = sp
; r2 = r6 = pVtxC r10 = pOutVtx r14 =
; r3 = r7 = r11 = r15 = pc
; ---------------------------------------------------------------------- --
; bDraw implicitly 1
; check HXF_PA_IN_PRIMITIVE; if 0, call Startlist
; ALWAYS call DrawVertex
tst r8, #HXF_PA_IN_PRIMITIVE ; if 0, call startlist and set to true
bne ARM_ASSEMBLE_IDX_TRISTRIP_STARTDRAW
; Call HXFState* HXFBeginPrimitiveList(HXFState*, void* pVtx0, void* pVtx1);
orr r8, r8, #HXF_PA_IN_PRIMITIVE ; set to 1 in local flags
mov r1, r4 ; vtx0
mov r2, r5 ; vtx1
and r3, r8, #HXF_PA_CULLDIR_CW ; send in cull direction!
add r3, r3, #1 ; plus 1
bl |ctlBeginPrimitiveList|
; Now, do cumpulsory DrawVertex
ARM_ASSEMBLE_IDX_TRISTRIP_STARTDRAW
;Call HXFState* pState->pSPProc(HXFState* pRasterState, void* vtx1, void* vtx2, void* vtx3);
;ldr r12, [r0, #HXFSTATE_OFFSET_PSPPROC]
mov r1, r6 ; load vtx2
mov r2, #0
mov r3, #0
;mov lr, pc
;mov pc, r12
bl |ctlVertex|
b ARM_ASSEMBLE_IDX_TRISTRIP_LOOPBACK ; Go and see if we're done!
ARM_ASSEMBLE_IDX_TRISTRIP_NODRAW
; exit point for any failed bDraw checks
; since bDraw is false, check to see if HXF_PA_IN_PRIMITIVE was true.
; if HXF_PA_IN_PRIMITIVE is true, call Endlist and clear the HXF_PA_IN_PRIMITIVE flag
tst r8, #HXF_PA_IN_PRIMITIVE
bicne r8, r8, #HXF_PA_IN_PRIMITIVE
blne |ctlEndPrimitiveList|
; Now, check to see if the clipped triangle buffer is full.
; If it is (or close), flush it.
ldr r12, [r0, #HXFSTATE_OFFSET_NUMCLIPPEDVERTICES]
ldr r11, [r0, #HXFSTATE_OFFSET_NUMCLIPPEDPRIMITIVES]
rsbs r12, r12, #(HXF_CLIP_VTX_SPACE - HXF_CLIP_VERTEX_BUFFER_PAD)
rsbgts r11, r11, #(HXF_CLIP_PRIMITIVE_SPACE - HXF_CLIP_PRIMITIVE_BUFFER_PAD)
blle |HXFDrawClippedTriangles| ; Flush the clip buffers
; check to see if loop is complete!
ARM_ASSEMBLE_IDX_TRISTRIP_LOOPBACK
; ---------------------------------------------------------------------- --
; Register Map -
; ---------------------------------------------------------------------- --
; r0 = pState r4 = pVtxA r8 = flags|cnt r12 =
; r1 = r5 = pVtxB r9 = pIndices r13 = sp
; r2 = r6 = pVtxC r10 = pOutVtx r14 =
; r3 = r7 = r11 = r15 = pc
; ---------------------------------------------------------------------- --
ldr r12, [r0, #HXFSTATE_OFFSET_NUMPRIMITIVES] ; count target
ldr r7, [r0, #HXFSTATE_OFFSET_OUTVERTEXSIZE] ; stride
ldr r10, [r0, #HXFSTATE_OFFSET_POUTVERTICES] ; Point to output list
mov r4, r5 ; shift vertex down
mov r5, r6 ; shift vertex down
eor r8, r8, #HXF_PA_CULLDIR_CW ; flip cull direction
add r9, r9, #2 ; increment to first index of next triangle
add r8, r8, #1 ; increment count of primitives
bic r1, r8, #0xFF<<24 ; clear out high bits!
cmp r1, r12 ; check to see if at end of loop
blt ARM_ASSEMBLE_IDX_TRISTRIP_LOOP ; no?
; yes, so quit....
ARM_ASSEMBLE_IDX_TRISTRIP_DONE ; Exit the whole shebang....
ldr r5, [r0, #HXFSTATE_OFFSET_NUMCLIPPEDVERTICES]
; first, if HXF_PA_IN_PRIMITIVE, then call EndList
tst r8, #HXF_PA_IN_PRIMITIVE
blne |ctlEndPrimitiveList|
; Next, if any remaining clipped vertices, draw them
cmp r5, #0
blgt |HXFDrawClippedTriangles| ; Flush the clip buffers
ldmfd sp!, {r4-r11, pc}
ENDP
;** ************************************************************************ **
;** ************************************************************************ **
; Name: HXFAssembleIndexedTRIFAN
; Description: This draws an indexed TRIFAN.
; This version changes the indexing mode from Strips to Fans.
; The current index points the to 2nd vertex of the current triangle.
; The count simply is the number of primitives drawn thus far.
; Plan:
; Here's a layout of what we need to do:
; Init - parameters, variables, and conditional stuff
; Based on the cullFace direction and clip enabled, we'll branch to the appropriate
; parts of the iterative loop (CCW/CW, or no cull check, clip) to perform.
; Also, set up bDraw and HXF_PA_IN_PRIMITIVE bits in the count register
; Loop (CCW, CW, and no cull, and clipping)
; Submit a triangle for the cull & clip tests (if enabled)
; Note: Complex clipped triangles will be handled directly by the Clip test
; DRAW: If passes both test, flag bDraw = 1 and HXF_PA_IN_PRIMITIVE = 1
; But, if HXF_PA_IN_PRIMITIVE was 0, then call StartList
; NO_DRAW: If bDraw is 0 (failed cull or clip), bDraw = HXF_PA_IN_PRIMITIVE = 0
; if HXF_PA_IN_PRIMITIVE was 1, call EndList
; if HXF_PA_IN_PRIMITIVE was already 0, check to see if ClippedTriangleBuffer needs
; to be flushed.
; reset for next loop; flip cullFace direction, move pointers back
; End
; If HXF_PA_IN_PRIMITIVE, call Endlist.
; flush ClippedTriangleBuffer if needed
; Input Arguments: pState in r0
; Output Argument: none
; Prototype in C: void HXFAssembleIndexedTRIFAN(HXFState* pState);
;** ************************************************************************ **
|HXFAssembleIndexedTriFan| PROC
; ---------------------------------------------------------------------- --
; Register Map -
; ---------------------------------------------------------------------- --
; r0 = pState r4 = pVtxA r8 = flags|cnt r12 =
; r1 = r5 = pVtxB r9 = pIndices r13 = sp
; r2 = r6 = pVtxC r10 = pOutVtx r14 =
; r3 = r7 = r11 = r15 = pc
; ---------------------------------------------------------------------- --
; r1-r3, r7, r10, and r12 are free for use.
; r11 is a special case, where we must do stuff with it
stmfd sp!, {r4-r11,lr} ; stack push!
ldr r12, [r0, #HXFSTATE_OFFSET_FLAGS]
mov r1, #0 ; use as loop counter up to NumPrimitives
; also, implicitly set bits 31-27 to 0 (bDraw, HXF_PA_IN_PRIMITIVE, bClipEnable,
; HXF_PA_CULLDIR_CW, HXF_PA_CULL_ENABLE
ands r12, r12, #HXF_CULL_MASK ; mask out all but the Culling bits
orrne r1, r1, #HXF_PA_CULL_ENABLE ; set to 1 if enabled
cmp r12, #HXF_CULL_CW ; if HXF_CULL_CW, then start with CW iteration
orreq r1, r1, #HXF_PA_CULLDIR_CW ; set to 1 for CW, 0 for CCW
ldr r2, [r0, #HXFSTATE_OFFSET_PINDICES] ; point to index list
; which is always incremented sequentially
mov r8, r1 ; send in the count & flags..
str r8, [r0, #HXFSTATE_OFFSET_STORAGE_LR] ; Store Cull info for clipper
ldr r7, [r0, #HXFSTATE_OFFSET_OUTVERTEXSIZE] ; stride
ldr r10, [r0, #HXFSTATE_OFFSET_POUTVERTICES] ; Point to output list
ldr r12, [r0, #HXFSTATE_OFFSET_BASEINDEX] ; Load Base Index
ldrh r4, [r2, #0] ; first index
ldrh r5, [r2, #2]! ; second index - NOTE -- auto increment!!!!!
sub r4, r4, r12 ; Account for Base index
sub r5, r5, r12 ;
mul r12, r4, r7 ; multiply by stride
add r4, r10, r12 ; first vertex pointer
mul r12, r5, r7 ; multiply by stride
add r5, r10, r12 ; second vertex pointer
mov r9, r2
; The only difference between CULL_CW and CULL_CCW are a couple the
; order of subtraction during vector calculation, so we went in and used
; some local bits to do it, rather than maintain 3 different loops.
; Furthermore, I will use a conditional to skip over culling entirely to keep
; just a single loop. If we need more performance due to branching penalties,
; then we can unroll things.
; In the C code, we store both the indices and the vertex pointer.
; In ARM, we'll only store the vertex pointer; we always know that
; we can find the index at count, count - 1, and count - 2 in the
; index list when we need them.
ARM_ASSEMBLE_IDX_TRIFAN_LOOP
; ---------------------------------------------------------------------- --
; Register Map -
; ---------------------------------------------------------------------- --
; r0 = pState r4 = pVtxA r8 = flags|cnt r12 =
; r1 = r5 = pVtxB r9 = pIndices r13 = sp
; r2 = r6 = pVtxC r10 = pOutVtx r14 =
; r3 = r7 = Stride r11 = r15 = pc
; ---------------------------------------------------------------------- --
ldr r12, [r0, #HXFSTATE_OFFSET_BASEINDEX]
; Prefetch
pld [r9, #HXF_PA_IDX_PREFETCH_STRIP] ; prefetch the indices ahead
ldrh r1, [r9, #HXF_PA_VTX_PREFETCH_BASEIDX_STRIP] ; get point c's index
sub r1, r1, r12 ; Account for base index
mul r14, r1, r7 ; multiply by stride
add r1, r10, r14 ; third vertex pointer
pld [r1] ; prefetch vertex
; Load New Vertex
ldrh r6, [r9, #2] ; third index
sub r6, r6, r12 ; Account for base index
mul r12, r6, r7 ; multiply by stride
add r6, r10, r12 ; third vertex pointer
ARM_ASSEMBLE_IDX_TRIFAN_CLIP
; ---------------------------------------------------------------------- --
; Register Map - Clipping
; ---------------------------------------------------------------------- --
; r0 = pState r4 = pVtxA r8 = flags|cnt r12 =
; r1 = k1* r5 = pVtxB r9 = pIndices r13 = sp
; r2 = k2* r6 = pVtxC r10 = pOutVtx r14 =
; r3 = k3* r7 = r11 = r15 = pc
; ---------------------------------------------------------------------- --
; Load Clip flags
ldr r12, [r0, #HXFSTATE_OFFSET_BASEINDEX]
ldr r1, [r0, #HXFSTATE_OFFSET_POUTCLIPFLAGS] ; pointer to clip flag array
ldrh r2, [r9, #0] ; get point b's index
ldrh r3, [r9, #2] ; get point c's index
sub r1, r1, r12 ; Account for base index
sub r2, r2, r12
sub r3, r3, r12
add r2, r1, r2 ; for k1, k2, k3
add r3, r1, r3
ldrb r7, [r1] ; k1
ldrb r11, [r2] ; k2
ldrb r12, [r3] ; k3
; logic:
; k1, k2, k3 are clip flags per vertex
; if ((k1&k2) && (k2&k3) && (k3&k1)) != 0) means totally outside (all segments totally outside)
; if (k1|k2|k3) means no clipping necessary (totally in)
; else clip it.
; if all results are a 1, then the trivial test out passes...
; that is, if all of the points are outside of the clipping planes
; and none of the segments cross a clip plane (all withing a clip region), then
; the triangle is trivially out.
; Otherwise, more testing is needed.
tst r7, r11 ; first check k1 & k2
tstne r11, r12 ; if (k1&k2) was !0, check (k2&k3)
tstne r7, r12 ; if (k2&k3) was !0 , check (k1&k3)
bne ARM_ASSEMBLE_IDX_TRIFAN_NODRAW ; trivial rejection if !0
; so it wasn't entirely out, let's see if it's trivially in!
ARM_ASSEMBLE_IDX_TRIFAN_TRIVIAL_CLIP
; if any result is 1, then the trivial boundary span test fails!
orrs r10, r7, r11 ; if ORing is ever nonzero, then we aren't trivially in
orrs r10, r10, r12 ; if first OR was zero, must check second OR
; if no 1's, then just draw (trivially in)...
; but if 0's, then clip first.
beq ARM_ASSEMBLE_IDX_TRIFAN_CULL
ARM_ASSEMBLE_IDX_TRIFAN_CALLCLIP
; Call HXFState* HXFClipTriangle(HXFState* pState, HUINT8* pV1, HUINT8* pV2, HUINT8* pV3)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -