📄 hxfassemblearm.s
字号:
; r3 = cy
; r11 = -/+Ax (CW/CCW)
; r12 = +/-Ay
sub r7, r2, r7 ; Bx = cx - ax
sub r1, r3, r1 ; By = cy - ay
; r7 = Bx
; r1 = By
; r11 = -/+Ax (CW/CCW)
; r12 = +/-Ay
; 2) Cross A into B to get Nz ONLY, then compare sign (don't need anything
; else, as camera in projected space is at the origin)
smull r3, r2, r11, r1 ; AxBy in 64bits of r3r12
smlals r3, r2, r12, r7 ; add -AyBx in 64 bits to r3r12, need middle 32
; r2 = 32.0 of Nz.EyeZ
; r3 = 0.32 of Nz.EyeZ
; Since we only care about the sign, just look at hi word in r3
; We expect the program to check for a negative!
blt ARM_ASSEMBLE_IDX_TRILIST_NODRAW
; Draw; gets here from cull disabled, cull pass and clip disabled, or cull/clip pass.
ARM_ASSEMBLE_IDX_TRILIST_DRAW
; ---------------------------------------------------------------------- --
; Register Map
; ---------------------------------------------------------------------- --
; r0 = pState r4 = pVtxA r8 = flags|cnt r12 =
; r1 = r5 = pVtxB r9 = pIndices r13 = sp
; r2 = r6 = pVtxC r10 = pOutVtx r14 =
; r3 = r7 = r11 = r15 = pc
; ---------------------------------------------------------------------- --
; bDraw implicitly 1
; check HXF_PA_IN_PRIMITIVE; if 0, call Startlist
; ALWAYS call DrawVertex
tst r8, #HXF_PA_IN_PRIMITIVE ; if 0, call startlist and set to true
bne ARM_ASSEMBLE_IDX_TRILIST_STARTDRAW3
orr r8, r8, #HXF_PA_IN_PRIMITIVE ; set to 1 in local flags
;Call HXFState* HXFBeginPrimitiveList(HXFState*, void* pVtx0, void* pVtx1);
mov r1, #0 ; vtx0 * = NULL
mov r2, #0 ; vtx1 * = NULL
mov r3, #0
bl |ctlBeginPrimitiveList|
; Now, do cumpulsory DrawVertex
ARM_ASSEMBLE_IDX_TRILIST_STARTDRAW3
;Call HXFState* pState->pSPProc(HXFState* pState, void* vtx0, void* vtx1, void* vtx2);
;ldr r12, [r0, #HXFSTATE_OFFSET_PSPPROC]
mov r1, r4 ; load vtx0
mov r2, r5 ; load vtx1
mov r3, r6 ; load vtx2
;mov lr, pc
;mov pc, r12
bl |ctlVertex|
b ARM_ASSEMBLE_IDX_TRILIST_LOOPBACK ; Go and see if we're done!
ARM_ASSEMBLE_IDX_TRILIST_NODRAW
; exit point for any failed bDraw checks
; since bDraw is false, check to see if HXF_PA_IN_PRIMITIVE was true.
; if HXF_PA_IN_PRIMITIVE is true, call Endlist and clear the HXF_PA_IN_PRIMITIVE flag
; Now, check to see if the clipped triangle buffer is full.
ldr r11, [r0, #HXFSTATE_OFFSET_NUMCLIPPEDVERTICES]
ldr r12, [r0, #HXFSTATE_OFFSET_NUMCLIPPEDPRIMITIVES]
rsbs r11, r11, #(HXF_CLIP_VTX_SPACE - HXF_CLIP_VERTEX_BUFFER_PAD)
rsbgts r12, r12, #(HXF_CLIP_VTX_SPACE - HXF_CLIP_PRIMITIVE_BUFFER_PAD)
bgt ARM_ASSEMBLE_IDX_TRILIST_LOOPBACK
tst r8, #HXF_PA_IN_PRIMITIVE
bicne r8, r8, #HXF_PA_IN_PRIMITIVE ; clear HXF_PA_IN_PRIMITIVE is it was set
blne |ctlEndPrimitiveList|
bl |HXFDrawClippedTriangles| ; Flush clipped triangle list
ARM_ASSEMBLE_IDX_TRILIST_LOOPBACK ; check to see if loop is complete!
; ---------------------------------------------------------------------- --
; Register Map
; ---------------------------------------------------------------------- --
; r0 = pState r4 = pVtxA r8 = flags|cnt r12 =
; r1 = r5 = pVtxB r9 = pIndices r13 = sp
; r2 = r6 = pVtxC r10 = pOutVtx r14 =
; r3 = r7 = r11 = r15 = pc
; ---------------------------------------------------------------------- --
ldr r12, [r0, #HXFSTATE_OFFSET_NUMPRIMITIVES] ; count target
add r9, r9, #(HXF_INDEX_SIZE * 3) ; increment to first index of next triangle
add r8, r8, #1 ; increment count of primitives
bic r7, r8, #0xFF<<24 ; clear out high bits!
cmp r7, r12 ; check to see if at end of loop
blt ARM_ASSEMBLE_IDX_TRILIST_LOOP
ARM_ASSEMBLE_IDX_TRILIST_DONE
; Exit the whole shebang....
; Preload as much as possible
ldr r5, [r0, #HXFSTATE_OFFSET_NUMCLIPPEDVERTICES]
; first, if HXF_PA_IN_PRIMITIVE, then call EndList
tst r8, #HXF_PA_IN_PRIMITIVE
blne |ctlEndPrimitiveList|
; Next, if any remaining clipped vertices, draw them
cmp r5, #0
blgt |HXFDrawClippedTriangles| ; Flush the clip buffers
ldmfd sp!, {r4-r11, pc}
ENDP
;** ************************************************************************ **
;** ************************************************************************ **
; Name: HXFAssembleIndexedTriStrip
; Description: This draws an indexed TriStrip.
; This version changes the indexing mode from Lists to Strips.
; The current index points the to first vertex of the current triangle.
; The count simply is the number of primitives drawn thus far.
;
; Note: "temp" storage has no guarantees of data persistency in the function call
; convention. "persistent" storage refers to callee saved registers, which can
; be used by us freely.
; Basically, r1-r3, r7, r10, and r12 are free for use.
; r11 is a special case, where we must do stuff with it
; Plan:
; Here's a layout of what we need to do:
; Init - parameters, variables, and conditional stuff
; Based on the cullFace direction and clip enabled, we'll branch to the appropriate
; parts of the iterative loop (CCW/CW, or no cull check, clip) to perform.
; Also, set up bDraw and HXF_PA_IN_PRIMITIVE bits in the count register
; Loop (CCW, CW, and no cull, and clipping)
; Submit a triangle for the cull & clip tests (if enabled)
; Note: Complex clipped triangles will be handled directly by the Clip test
; DRAW: If passes both test, flag bDraw = 1 and HXF_PA_IN_PRIMITIVE = 1
; But, if HXF_PA_IN_PRIMITIVE was 0, then call StartList
; NO_DRAW: If bDraw is 0 (failed cull or clip), bDraw = HXF_PA_IN_PRIMITIVE = 0
; if HXF_PA_IN_PRIMITIVE was 1, call EndList
; if HXF_PA_IN_PRIMITIVE was already 0, check to see if ClippedTriangleBuffer needs
; to be flushed.
; reset for next loop; flip cullFace direction, move pointers back
; End
; If HXF_PA_IN_PRIMITIVE, call Endlist.
; flush ClippedTriangleBuffer if needed
; Input Arguments: pState in r7
; Output Argument: none
; Prototype in C: void HXFAssembleIndexedTriStrip(HXFState* pState);
;** ************************************************************************ **
|HXFAssembleIndexedTriStrip| PROC
; ---------------------------------------------------------------------- --
; Register Map
; ---------------------------------------------------------------------- --
; r0 = pState r4 = pVtxA r8 = flags|cnt r12 =
; r1 = r5 = pVtxB r9 = pIndices r13 = sp
; r2 = r6 = pVtxC r10 = pOutVtx r14 =
; r3 = r7 = r11 = r15 = pc
; ---------------------------------------------------------------------- --
stmfd sp!, {r4-r11,lr} ; stack push!
ldr r12, [r0, #HXFSTATE_OFFSET_FLAGS]
mov r1, #0 ; use as loop counter up to NumPrimitives
; also, implicitly set bits 31-27 to 0 (bDraw, HXF_PA_IN_PRIMITIVE, bClipEnable,
; HXF_PA_CULLDIR_CW, HXF_PA_CULL_ENABLE
ands r12, r12, #HXF_CULL_MASK ; mask out all but the Culling bits
orrne r1, r1, #HXF_PA_CULL_ENABLE ; set to 1 if enabled
ands r12, r12, #HXF_CULL_CCW ; if HXF_CULL_CCW, then start with CCW iteration
orreq r1, r1, #HXF_PA_CULLDIR_CW ; set to 1 for CW, 0 for CCW
ldr r2, [r0, #HXFSTATE_OFFSET_PINDICES] ; point to index list
; which is always incremented sequentially
mov r8, r1 ; send in the count & flags..
ldr r7, [r0, #HXFSTATE_OFFSET_OUTVERTEXSIZE] ; stride
ldr r10, [r0, #HXFSTATE_OFFSET_POUTVERTICES] ; Point to output list
ldr r12, [r0, #HXFSTATE_OFFSET_BASEINDEX] ;
ldrh r4, [r2, #0] ; first index
ldrh r5, [r2, #2] ; second index
sub r4, r4, r12 ; Factor in Base index
sub r5, r5, r12
mul r12, r4, r7 ; multiply by stride
add r4, r10, r12 ; first vertex pointer
mul r12, r5, r7 ; multiply by stride
add r5, r10, r12 ; second vertex pointer
mov r9, r2
; The only difference between CULL_CW and CULL_CCW are a couple the
; order of subtraction during vector calculation, so we went in and used
; some local bits to do it, rather than maintain 3 different loops.
; Furthermore, I will use a conditional to skip over culling entirely to keep
; just a single loop. If we need more performance due to branching penalties,
; then we can unroll things.
; In the C code, we store both the indices and the vertex pointer.
; In ARM, we'll only store the vertex pointer; we always know that
; we can find the index at count, count - 1, and count - 2 in the
; index list when we need them.
ARM_ASSEMBLE_IDX_TRISTRIP_LOOP
; ---------------------------------------------------------------------- --
; Register Map
; ---------------------------------------------------------------------- --
; r0 = pState r4 = pVtxA r8 = flags|cnt r12 =
; r1 = r5 = pVtxB r9 = pIndices r13 = sp
; r2 = r6 = pVtxC r10 = pOutVtx r14 =
; r3 = r7 = stride r11 = r15 = pc
; ---------------------------------------------------------------------- --
ldr r12, [r0, #HXFSTATE_OFFSET_BASEINDEX]
; Prefetch
pld [r9, #HXF_PA_IDX_PREFETCH_STRIP] ; prefetch the indices ahead
ldrh r1, [r9, #HXF_PA_VTX_PREFETCH_BASEIDX_STRIP] ; get point a's index
sub r1, r1, r12 ; Account for Base Index
mul r14, r1, r7 ; multiply by stride
add r1, r10, r14 ; third vertex pointer
pld [r1] ; prefetch vertex a
; Load New Vertex
ldrh r6, [r9, #4] ; third index
sub r6, r6, r12 ; Account for Base Index
mul r14, r6, r7 ; multiply by stride
add r6, r10, r14 ; third vertex pointer
ARM_ASSEMBLE_IDX_TRISTRIP_CLIP
; ---------------------------------------------------------------------- --
; Register Map - Clipping
; ---------------------------------------------------------------------- --
; r0 = pState r4 = pVtxA r8 = flags|cnt r12 =
; r1 = k1* r5 = pVtxB r9 = pIndices r13 = sp
; r2 = k2* r6 = pVtxC r10 = pOutVtx r14 =
; r3 = k3* r7 = r11 = r15 = pc
; ---------------------------------------------------------------------- --
; Load Clip flags
ldr r7, [r0, #HXFSTATE_OFFSET_POUTCLIPFLAGS] ; pointer to clip flag array
ldr r12, [r0, #HXFSTATE_OFFSET_BASEINDEX]
ldrh r1, [r9] ; get point a's index
ldrh r2, [r9, #HXF_INDEX_SIZE] ; get point b's index
ldrh r3, [r9, #(HXF_INDEX_SIZE<<1)] ; get point c's index
sub r1, r1, r12 ; Account for Base Index
sub r2, r2, r12
sub r3, r3, r12
add r1, r7, r1 ; Byte pointer to get the correct clip flag info
add r2, r7, r2 ; for k1, k2, k3
add r3, r7, r3
ldrb r7, [r1] ; k1
ldrb r11, [r2] ; k2
ldrb r12, [r3] ; k3
; logic:
; k1, k2, k3 are clip flags per vertex
; if ((k1&k2) && (k2&k3) && (k3&k1)) != 0) means totally outside (all segments totally outside)
; if (k1|k2|k3) means no clipping necessary (totally in)
; else clip it.
; if all results are a 1, then the trivial test out passes...
; that is, if all of the points are outside of the clipping planes
; and none of the segments cross a clip plane (all withing a clip region), then
; the triangle is trivially out.
; Otherwise, more testing is needed.
tst r7, r11 ; first check k1 & k2
tstne r11, r12 ; if (k1&k2) was !0, check (k2&k3)
tstne r7, r12 ; if (k2&k3) was !0 , check (k1&k3)
bne ARM_ASSEMBLE_IDX_TRISTRIP_NODRAW ; trivial rejection if !0
ARM_ASSEMBLE_IDX_TRISTRIP_TRIVIAL_CLIP
; if any result is 1, then the trivial boundary span test fails!
orrs r10, r7, r11 ; if ORing is ever nonzero, then we aren't trivially in
orrs r10, r10, r12 ; if first OR was zero, must check second OR
; if no 1's, then just draw (trivially in)...
; but if 0's, then clip first.
beq ARM_ASSEMBLE_IDX_TRISTRIP_CULL
ARM_ASSEMBLE_IDX_TRISTRIP_CALLCLIP
;Call HXFState* HXFClipTriangle(HXFState* pState, HUINT8* pV1, HUINT8* pV2, HUINT8* pV3)
str r8, [r0, #HXFSTATE_OFFSET_STORAGE_LR] ; Store Cull info for clipper
blne |HXFClipTriangle|
b ARM_ASSEMBLE_IDX_TRISTRIP_NODRAW
ARM_ASSEMBLE_IDX_TRISTRIP_CULL
; ---------------------------------------------------------------------- --
; Register Map - Cull
; ---------------------------------------------------------------------- --
; r0 = pState r4 = pVtxA r8 = flags|cnt r12 =
; r1 = r5 = pVtxB r9 = pIndices r13 = sp
; r2 = r6 = pVtxC r10 = pOutVtx r14 =
; r3 = r7 = r11 = r15 = pc
; ---------------------------------------------------------------------- --
; Test if we need to cull
tst r8, #HXF_PA_CULL_ENABLE ; cull check ; ... and submit it to the culling function (inlined) if needed
beq ARM_ASSEMBLE_IDX_TRISTRIP_DRAW
; CLIP SPACE vertex pointers A, B, C in r4, r5, r6
; result: signed value of dot product; if negative, visible, otherwise not visible
; The culling algorithm is basically a dot product of the camera to the surface normal.
; But, we have to generate the normal here from the triangle orientation.
; Since we are in clip space, the camera is [0,0,+/-1] depending on your LHR CCW or CW flag.
; This means we only really need the Z component of the face normal.
; 1) Create vectors
ldr r7, [r4] ; ax
ldr r2, [r5] ; bx
ldr r1, [r4,#4] ; ay
ldr r3, [r5,#4] ; by
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -