⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 hxfassemblearm.s

📁 Lido PXA270平台开发板的最新BSP,包括源代码
💻 S
📖 第 1 页 / 共 5 页
字号:
	;Call  HXFState* HXFClipTriangle(HXFState* pState, HUINT8* pV1, HUINT8* pV2, HUINT8* pV3)
	blne |HXFClipTriangle|

	b ARM_ASSEMBLE_TRILIST_NODRAW
	
ARM_ASSEMBLE_TRILIST_CULL	; cull
	; ---------------------------------------------------------------------- --
	; Register Map - 
	; ---------------------------------------------------------------------- --
	; r0 =	pState		r4 = pVtxA		r8 = flags|cnt  r12 = 		
	; r1 =              r5 = pVtxB  	r9 =            r13 = sp
	; r2 =              r6 = pVtxC      r10 =           r14 = 
	; r3 =              r7 = stride		r11 =           r15 = pc
	; ---------------------------------------------------------------------- --

	; Test if we need to cull 
	tst r8, #HXF_PA_CULL_ENABLE	; cull check
	beq ARM_ASSEMBLE_TRILIST_DRAW 	; ... and submit it to the culling function (inlined) if needed


	; CLIP SPACE vertex  pointers A, B, C in r4, r5, r6
	; result: signed value of dot product; if negative, visible, otherwise not visible

	; The culling algorithm is basically a dot product of the camera to the surface normal.
	; But, we have to generate the normal here from the triangle orientation.
	; Since we are in clip space, the camera is [0,0,+/-1] depending on your LHR CCW or CW flag.
	; This means we only really need the Z component of the face normal.

	; 1)  Create vectors
	ldr r7, [r4]		; ax
	ldr r2, [r5]		; bx
	ldr r1, [r4,#4]		; ay
	ldr r3, [r5,#4]		; by
	ldr r10, [r6, #4]		; cx

	tst r8, #HXF_PA_CULLDIR_CW	; if 0, CCW; 1, CW
	; The only difference in computation is really a which term (Ax oy Ay) gets negated.

	; CW:	formula Nz.EyeZ = (AxBy - AyBx )  . [0,0,-1]
	; CW:					= (AyBx - AxBy)
	; CCW:	formula Nz.EyeZ = AxBy - AyBx . [0,0,1]
	; CCW:					= (AxBy - AyBx)

	subne r11, r7, r2		; CW: -Ax = -(bx - ax ) = ax - bx	
	subeq r11, r2, r7		; CCW: Ax = bx - ax					; 

	ldr r2, [r6]			; cx 

	subne r12, r3, r1		; CW: Ay = by - ay
	subeq r12, r1, r3		; CCW: -Ay = -(by - ay) = ay -by		; 

	; r7 = ax
	; r1 = ay
	; r2 = cx
	; r10 = cy
	; r11 = -/+Ax	(CW/CCW)
	; r12 = +/-Ay
	sub r1, r10, r1			; By = cy - ay
	sub r7, r2, r7			; Bx = cx - ax 

	; r7 = Bx
	; r1 = By
	; r11 = -/+Ax	(CW/CCW)
	; r12 = +/-Ay
	
	; 2) Cross A into B to get Nz ONLY, then compare sign (don't need anything else,
	;	 as camera in projected space is at the origin)
	smull r3, r2, r11, r1		; AxBy in 64bits of r3r12
	smlals r3, r2, r12, r7		; add -AyBx in 64 bits to r3r12, need middle 32

	; r2 = 32.0 of Nz.EyeZ
	; r3 = 0.32 of Nz.EyeZ
	; Since we only care about the sign, just look at hi word in r3
	; We expect the program to check for a negative!
	blt	ARM_ASSEMBLE_TRILIST_NODRAW
	; passed cull test

	; Draw; gets here from cull disabled, cull pass and clip disabled, or cull/clip pass.

ARM_ASSEMBLE_TRILIST_DRAW 
	; ---------------------------------------------------------------------- --
	; Register Map - 
	; ---------------------------------------------------------------------- --
	; r0 =	pState		r4 = pVtxA		r8 = flags|cnt  r12 = 		
	; r1 =              r5 = pVtxB  	r9 =  clipFlags r13 = sp
	; r2 =              r6 = pVtxC      r10 =           r14 = 
	; r3 =              r7 = stride		r11 =           r15 = pc
	; ---------------------------------------------------------------------- --
	; bDraw implicitly 1
	; check HXF_PA_IN_PRIMITIVE; if 0, call Startlist
	;				ALWAYS call DrawVertex

	tst r8, #HXF_PA_IN_PRIMITIVE		; if 0, call startlist and set to true
	bne ARM_ASSEMBLE_TRILIST_STARTDRAW3
	orr r8, r8, #HXF_PA_IN_PRIMITIVE		; set to 1 in local flags

	;Call HXFState* HXFBeginPrimitiveList(HXFState*, void* pVtx0, void* pVtx1);
	mov r1, #0				; vtx0 * = NULL
	mov r2, #0				; vtx1 * = NULL
	mov r3, #0
	bl |ctlBeginPrimitiveList|

ARM_ASSEMBLE_TRILIST_STARTDRAW3 

	;Call HXFState* pState->pSPProc(HXFState*, void* vtx0, void* vtx1, void* vtx2);
	;ldr r12, [r0, #HXFSTATE_OFFSET_PSPPROC]
	mov r1, r4	; load vtx0
	mov r2, r5	; load vtx1
	mov r3, r6	; load vtx2
	;mov lr, pc
	;mov pc, r12
	bl |ctlVertex|
	b ARM_ASSEMBLE_TRILIST_LOOPBACK ; Go and see if we're done!

ARM_ASSEMBLE_TRILIST_NODRAW 
	; exit point for any failed bDraw checks

	; since bDraw is false, check to see if HXF_PA_IN_PRIMITIVE was true.
	;	if HXF_PA_IN_PRIMITIVE is true, call Endlist and clear the HXF_PA_IN_PRIMITIVE flag
	tst r8, #HXF_PA_IN_PRIMITIVE
	bicne r8, r8, #HXF_PA_IN_PRIMITIVE
	blne |ctlEndPrimitiveList|

	; Now, check to see if the clipped triangle buffer is full.
	; If it is (or close), flush it.
	ldr r12, [r0, #HXFSTATE_OFFSET_NUMCLIPPEDVERTICES]
	ldr r11, [r0, #HXFSTATE_OFFSET_NUMCLIPPEDPRIMITIVES]

	rsbs r12, r12, #(HXF_CLIP_VTX_SPACE - HXF_CLIP_VERTEX_BUFFER_PAD)
	rsbgts r11, r11, #(HXF_CLIP_PRIMITIVE_SPACE- HXF_CLIP_PRIMITIVE_BUFFER_PAD)
	blle |HXFDrawClippedTriangles| ; Flush the clip buffers

; check to see if loop is complete!
ARM_ASSEMBLE_TRILIST_LOOPBACK 
	; ---------------------------------------------------------------------- --
	; Register Map - 
	; ---------------------------------------------------------------------- --
	; r0 =	pState		r4 = pVtxA		r8 = flags|cnt  r12 = 		
	; r1 =              r5 = pVtxB  	r9 =  clipFlags r13 = sp
	; r2 =              r6 = pVtxC      r10 =           r14 = 
	; r3 =              r7 = stride		r11 =           r15 = pc
	; ---------------------------------------------------------------------- --

	ldr r7, [r0, #HXFSTATE_OFFSET_OUTVERTEXSIZE]		; stride
	ldr r12, [r0, #HXFSTATE_OFFSET_NUMPRIMITIVES]	; count target

	add r9, r9, #3			; increment ClipFlags
	add r8, r8, #1			; increment count of primitives
	bic r1, r8, #0xFF<<24	; clear out high bits!

	; Load Vertices for the next iteration
	add r4, r6, r7
	add r5, r4, r7
	add r6, r5, r7

	cmp r1, r12				; check to see if at end of loop
	blt ARM_ASSEMBLE_TRILIST_LOOP	; no?

ARM_ASSEMBLE_TRILIST_DONE 

	; Exit the whole shebang....
	; Preload as much as possible
	ldr r5, [r0, #HXFSTATE_OFFSET_NUMCLIPPEDVERTICES]
	
	; first, if HXF_PA_IN_PRIMITIVE, then call EndList
	tst r8, #HXF_PA_IN_PRIMITIVE
	blne |ctlEndPrimitiveList|

	; Next, if any remaining clipped vertices, draw them
	cmp r5, #0
	blgt |HXFDrawClippedTriangles| ; Flush the clip buffers

	ldmfd sp!, {r4-r11, pc}
	ENDP
;** ************************************************************************ **
	
;** ************************************************************************ **
; Name:				HXFAssembleTriStrip
; Description:		This draws a TriStrip.  	
; This version changes the indexing mode from Lists to Strips.
; The current index points the to first vertex of the current triangle.
; The count simply is the number of primitives drawn thus far.
; Plan:

; Here's a layout of what we need to do:

; Init - parameters, variables, and conditional stuff
;	Based on the cullFace direction and clip enabled, we'll branch to the appropriate
;	parts of the iterative loop (CCW/CW, or no cull check, clip) to perform.  
;	Also, set up bDraw and HXF_PA_IN_PRIMITIVE bits in the count register

; Loop (CCW, CW, and no cull, and clipping)
;	Submit a triangle for the cull & clip tests (if enabled)
;		Note:  Complex clipped triangles will be handled directly by the Clip test
;	DRAW: If passes both test, flag bDraw = 1 and HXF_PA_IN_PRIMITIVE = 1 
;		But, if HXF_PA_IN_PRIMITIVE was 0, then call StartList
;	NO_DRAW: If bDraw is 0 (failed cull or clip), bDraw = HXF_PA_IN_PRIMITIVE = 0
;		if HXF_PA_IN_PRIMITIVE was 1, call EndList
;		if HXF_PA_IN_PRIMITIVE was already 0, check to see if ClippedTriangleBuffer needs
;			to be flushed.
;	reset for next loop; flip cullFace direction, move pointers back
; End
;	If HXF_PA_IN_PRIMITIVE, call Endlist.
;	flush ClippedTriangleBuffer if needed
; Input Arguments: 	pState in r7	
; Output Argument:	none
; Prototype in C:	void HXFAssembleTriStrip(HXFState* pState);
;** ************************************************************************ **	
|HXFAssembleTriStrip| PROC
	; ---------------------------------------------------------------------- --
	; Register Map - 
	; ---------------------------------------------------------------------- --
	; r0 =	pState		r4 = pVtxA		r8 = flags|cnt  r12 = 		
	; r1 =              r5 = pVtxB  	r9 =  clipFlags r13 = sp
	; r2 =              r6 = pVtxC      r10 =           r14 = 
	; r3 =              r7 = stride		r11 =           r15 = pc
	; ---------------------------------------------------------------------- --
	; Note:  "temp" storage has no guarantees of data persistency in the function call
	;		 convention.  "persistent" storage refers to callee saved registers, which can
	;		 be used by us freely.

	; Basically, r1-r3, r7, r10, and r12 are free for use.
	;	r11 is a special case, where we must do stuff with it

	stmfd sp!, {r4-r11,lr}	; stack push!

	ldr r12, [r0, #HXFSTATE_OFFSET_FLAGS]
	mov r1, #0		; use as loop counter up to NumPrimitives
					; also, implicitly set bits 31-27 to 0 (bDraw, HXF_PA_IN_PRIMITIVE, bClipEnable,
					; HXF_PA_CULLDIR_CW, HXF_PA_CULL_ENABLE

	ands r12, r12, #HXF_CULL_MASK		; mask out all but the Culling bits
	orrne r1, r1, #HXF_PA_CULL_ENABLE			; set to 1 if enabled
	ands r12, r12,  #HXF_CULL_CCW				; if HXF_CULL_CCW, then start with CCW iteration
	orreq r1, r1, #HXF_PA_CULLDIR_CW				; set to 1 for CW, 0 for CCW

	ldr r4, [r0, #HXFSTATE_OFFSET_POUTVERTICES]		; Point to output list
	ldr r7, [r0, #HXFSTATE_OFFSET_OUTVERTEXSIZE]		; stride
	ldr r9, [r0, #HXFSTATE_OFFSET_POUTCLIPFLAGS]		; pointer to clip flag array

	mov r8, r1	; send in the count & flags..

	; Setup Initial vertices
	add r5, r4, r7
	add r6, r5, r7

	; The only difference between CULL_CW and CULL_CCW are a couple the
	; order of subtraction during vector calculation, so we went in and used
	; some local bits to do it, rather than maintain 3 different loops.

	; Furthermore, I will use a conditional to skip over culling entirely to keep
	; just a single loop.  If we need more performance due to branching penalties,
	; then we can unroll things.

	; In the C code, we store both the indices and the vertex pointer.
	; In ARM, we'll only store the vertex pointer; we always know that
	; we can find the index at count, count - 1, and count - 2 in the 
	; index list when we need them.

ARM_ASSEMBLE_TRISTRIP_LOOP
	; ---------------------------------------------------------------------- --
	; Register Map - 
	; ---------------------------------------------------------------------- --
	; r0 =	pState		r4 = pVtxA		r8 = flags|cnt  r12 = 		
	; r1 =              r5 = pVtxB  	r9 =  clipFlags r13 = sp
	; r2 =              r6 = pVtxC      r10 =           r14 = 
	; r3 =              r7 = stride		r11 =           r15 = pc
	; ---------------------------------------------------------------------- --

	; Prefetch
	mov r1, #HXF_PA_VTX_PREFETCH_DISTANCE
	mul r12, r7, r1
	add r1, r6, r1
	pld [r1] ; prefetch vertex a 

ARM_ASSEMBLE_TRISTRIP_CLIP
	; ---------------------------------------------------------------------- --
	; Register Map - Clipping
	; ---------------------------------------------------------------------- --
	; r0 =	pState		r4 = pVtxA		r8 = flags|cnt  r12 = 		
	; r1 = k1*          r5 = pVtxB  	r9 =  clipFlags r13 = sp
	; r2 = k2*          r6 = pVtxC      r10 =           r14 = 
	; r3 = k3*          r7 = stride		r11 =           r15 = pc
	; ---------------------------------------------------------------------- --
	; Load Clip flags
	add r1, r9, #0		; Byte pointer to get the correct clip flag info
	add r2, r1, #1		;	for k1, k2, k3
	add r3, r2, #1

	ldrb r7, [r1]	; k1
	ldrb r11, [r2]	; k2
	ldrb r12, [r3]	; k3
 	
	; logic:
	; k1, k2, k3 are clip flags per vertex
	; if ((k1&k2) && (k2&k3) && (k3&k1)) != 0) means totally outside (all segments totally outside)
	; if (k1|k2|k3) means no clipping necessary (totally in)
	; else clip it.
		
	; if all results are a 1, then the trivial test out passes...
	; that is, if all of the points are outside of the clipping planes
	; and none of the segments cross a clip plane (all withing a clip region), then
	; the triangle is trivially out.
	; Otherwise, more testing is needed.
	tst r7, r11		; first check k1 & k2
	tstne r11, r12		; if (k1&k2) was !0, check (k2&k3)
	tstne r7, r12		; if (k2&k3) was !0, check (k1&k3)
	bne ARM_ASSEMBLE_TRISTRIP_NODRAW	; trivial rejection if !0

	; so it wasn't entirely out, let's see if it's trivially in!
ARM_ASSEMBLE_TRISTRIP_TRIVIAL_CLIP 
	; if any result is 1, then the trivial boundary span test fails!
	orrs r10, r7, r11		; if ORing is ever nonzero, then we aren't trivially in
	orrs r10, r10, r12	; if first OR was zero, must check second OR

	; if no 1's, then just draw (trivially in)...
	; but if 0's, then clip first.
	beq ARM_ASSEMBLE_TRISTRIP_CULL

ARM_ASSEMBLE_TRISTRIP_CALLCLIP 
	;Call  HXFState* HXFClipTriangle(HXFState* pState, HUINT8* pV1, HUINT8* pV2, H

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -