⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 hxfassemblearm.s

📁 Lido PXA270平台开发板的最新BSP,包括源代码
💻 S
📖 第 1 页 / 共 5 页
字号:
	; r3 = cy
	; r11 = -/+Ax	(CW/CCW)
	; r12 = +/-Ay

	sub r7, r2, r7			; Bx = cx - ax 
	sub r1, r3, r1			; By = cy - ay

	; r7 = Bx
	; r1 = By
	; r11 = -/+Ax	(CW/CCW)
	; r12 = +/-Ay
	
	; 2) Cross A into B to get Nz ONLY, then compare sign (don't need anything 
	;	 else, as camera in projected space is at the origin)
	smull r3, r2, r11, r1		; AxBy in 64bits of r3r12
	smlals r3, r2, r12, r7		; add -AyBx in 64 bits to r3r12, need middle 32

	; r2 = 32.0 of Nz.EyeZ
	; r3 = 0.32 of Nz.EyeZ

	; Since we only care about the sign, just look at hi word in r3
	; We expect the program to check for a negative!
	blt	ARM_ASSEMBLE_IDX_TRILIST_NODRAW

; Draw; gets here from cull disabled, cull pass and clip disabled, or cull/clip pass.
ARM_ASSEMBLE_IDX_TRILIST_DRAW 
	; ---------------------------------------------------------------------- --
	; Register Map
	; ---------------------------------------------------------------------- --
	; r0 =	pState		r4 = pVtxA		r8 = flags|cnt  r12 = 		
	; r1 =              r5 = pVtxB  	r9 = pIndices   r13 = sp
	; r2 =              r6 = pVtxC      r10 = pOutVtx   r14 = 
	; r3 =              r7 =			r11 =           r15 = pc
	; ---------------------------------------------------------------------- --
	; bDraw implicitly 1
	; check HXF_PA_IN_PRIMITIVE; if 0, call Startlist
	;				ALWAYS call DrawVertex
	tst r8, #HXF_PA_IN_PRIMITIVE		; if 0, call startlist and set to true
	bne ARM_ASSEMBLE_IDX_TRILIST_STARTDRAW3
	orr r8, r8, #HXF_PA_IN_PRIMITIVE		; set to 1 in local flags

	;Call HXFState* HXFBeginPrimitiveList(HXFState*, void* pVtx0, void* pVtx1);
	mov r1, #0				; vtx0 * = NULL
	mov r2, #0				; vtx1 * = NULL
	mov r3, #0
	bl |ctlBeginPrimitiveList|

; Now, do cumpulsory DrawVertex
ARM_ASSEMBLE_IDX_TRILIST_STARTDRAW3 

	;Call HXFState* pState->pSPProc(HXFState* pState, void* vtx0, void* vtx1, void* vtx2);
	;ldr r12, [r0, #HXFSTATE_OFFSET_PSPPROC]
	mov r1, r4	; load vtx0
	mov r2, r5	; load vtx1
	mov r3, r6	; load vtx2
	;mov lr, pc
	;mov pc, r12
	bl |ctlVertex|
	b ARM_ASSEMBLE_IDX_TRILIST_LOOPBACK ; Go and see if we're done!

ARM_ASSEMBLE_IDX_TRILIST_NODRAW 
	; exit point for any failed bDraw checks

	; since bDraw is false, check to see if HXF_PA_IN_PRIMITIVE was true.
	;	if HXF_PA_IN_PRIMITIVE is true, call Endlist and clear the HXF_PA_IN_PRIMITIVE flag

	; Now, check to see if the clipped triangle buffer is full.
	ldr r11, [r0, #HXFSTATE_OFFSET_NUMCLIPPEDVERTICES]
	ldr r12, [r0, #HXFSTATE_OFFSET_NUMCLIPPEDPRIMITIVES]

	rsbs r11, r11, #(HXF_CLIP_VTX_SPACE - HXF_CLIP_VERTEX_BUFFER_PAD)
	rsbgts r12, r12, #(HXF_CLIP_VTX_SPACE - HXF_CLIP_PRIMITIVE_BUFFER_PAD)
	bgt ARM_ASSEMBLE_IDX_TRILIST_LOOPBACK
	
	tst r8, #HXF_PA_IN_PRIMITIVE
	bicne r8, r8, #HXF_PA_IN_PRIMITIVE	; clear HXF_PA_IN_PRIMITIVE is it was set
	blne |ctlEndPrimitiveList|

	bl |HXFDrawClippedTriangles| ; Flush clipped triangle list

ARM_ASSEMBLE_IDX_TRILIST_LOOPBACK ; check to see if loop is complete!
	; ---------------------------------------------------------------------- --
	; Register Map
	; ---------------------------------------------------------------------- --
	; r0 =	pState		r4 = pVtxA		r8 = flags|cnt  r12 = 		
	; r1 =              r5 = pVtxB  	r9 = pIndices   r13 = sp
	; r2 =              r6 = pVtxC      r10 = pOutVtx   r14 = 
	; r3 =              r7 =			r11 =           r15 = pc
	; ---------------------------------------------------------------------- --
	ldr r12, [r0, #HXFSTATE_OFFSET_NUMPRIMITIVES]	; count target

	add r9, r9, #(HXF_INDEX_SIZE * 3)	; increment to first index of next triangle
	add r8, r8, #1			; increment count of primitives
	bic r7, r8, #0xFF<<24	; clear out high bits!

	cmp r7, r12				; check to see if at end of loop
	blt ARM_ASSEMBLE_IDX_TRILIST_LOOP	

ARM_ASSEMBLE_IDX_TRILIST_DONE 
	; Exit the whole shebang....
	; Preload as much as possible
	ldr r5, [r0, #HXFSTATE_OFFSET_NUMCLIPPEDVERTICES]
	
	; first, if HXF_PA_IN_PRIMITIVE, then call EndList
	tst r8, #HXF_PA_IN_PRIMITIVE
	blne |ctlEndPrimitiveList|

	; Next, if any remaining clipped vertices, draw them
	cmp r5, #0
	blgt |HXFDrawClippedTriangles| ; Flush the clip buffers

	ldmfd sp!, {r4-r11, pc}
	ENDP
;** ************************************************************************ **
	

;** ************************************************************************ **
; Name:				HXFAssembleIndexedTriStrip
; Description:		This draws an indexed TriStrip.  	
;	This version changes the indexing mode from Lists to Strips.
;	The current index points the to first vertex of the current triangle.
;	The count simply is the number of primitives drawn thus far.
;
;	 Note:  "temp" storage has no guarantees of data persistency in the function call
;		 convention.  "persistent" storage refers to callee saved registers, which can
;		 be used by us freely.

;	Basically, r1-r3, r7, r10, and r12 are free for use.
;		r11 is a special case, where we must do stuff with it
; Plan:
;	Here's a layout of what we need to do:
; Init - parameters, variables, and conditional stuff
;	Based on the cullFace direction and clip enabled, we'll branch to the appropriate
;	parts of the iterative loop (CCW/CW, or no cull check, clip) to perform.  
;	Also, set up bDraw and HXF_PA_IN_PRIMITIVE bits in the count register

; Loop (CCW, CW, and no cull, and clipping)
;	Submit a triangle for the cull & clip tests (if enabled)
;		Note:  Complex clipped triangles will be handled directly by the Clip test
;	DRAW: If passes both test, flag bDraw = 1 and HXF_PA_IN_PRIMITIVE = 1 
;		But, if HXF_PA_IN_PRIMITIVE was 0, then call StartList
;	NO_DRAW: If bDraw is 0 (failed cull or clip), bDraw = HXF_PA_IN_PRIMITIVE = 0
;		if HXF_PA_IN_PRIMITIVE was 1, call EndList
;		if HXF_PA_IN_PRIMITIVE was already 0, check to see if ClippedTriangleBuffer needs
;			to be flushed.
;	reset for next loop; flip cullFace direction, move pointers back
; End
;	If HXF_PA_IN_PRIMITIVE, call Endlist.
;	flush ClippedTriangleBuffer if needed

; Input Arguments: 	pState in r7	
; Output Argument:	none
; Prototype in C:	void HXFAssembleIndexedTriStrip(HXFState* pState);
;** ************************************************************************ **
|HXFAssembleIndexedTriStrip| PROC	
	; ---------------------------------------------------------------------- --
	; Register Map
	; ---------------------------------------------------------------------- --
	; r0 =	pState		r4 = pVtxA		r8 = flags|cnt  r12 = 		
	; r1 =              r5 = pVtxB  	r9 = pIndices   r13 = sp
	; r2 =              r6 = pVtxC      r10 = pOutVtx   r14 = 
	; r3 =              r7 =			r11 =           r15 = pc
	; ---------------------------------------------------------------------- --
	stmfd sp!, {r4-r11,lr}	; stack push!

	ldr r12, [r0, #HXFSTATE_OFFSET_FLAGS]
	mov r1, #0		; use as loop counter up to NumPrimitives
					; also, implicitly set bits 31-27 to 0 (bDraw, HXF_PA_IN_PRIMITIVE, bClipEnable,
					; HXF_PA_CULLDIR_CW, HXF_PA_CULL_ENABLE

	ands r12, r12, #HXF_CULL_MASK		; mask out all but the Culling bits
	orrne r1, r1, #HXF_PA_CULL_ENABLE			; set to 1 if enabled
	ands r12, r12, #HXF_CULL_CCW				; if HXF_CULL_CCW, then start with CCW iteration
	orreq r1, r1, #HXF_PA_CULLDIR_CW				; set to 1 for CW, 0 for CCW

	ldr r2, [r0, #HXFSTATE_OFFSET_PINDICES]				; point to index list
		; which is always incremented sequentially

	mov r8, r1	; send in the count & flags..
	
	ldr r7, [r0, #HXFSTATE_OFFSET_OUTVERTEXSIZE]		; stride
	ldr r10, [r0, #HXFSTATE_OFFSET_POUTVERTICES]		; Point to output list
	ldr r12, [r0, #HXFSTATE_OFFSET_BASEINDEX]			;

	ldrh r4, [r2, #0]	;	first index
	ldrh r5, [r2, #2]	;   second index

	sub r4, r4, r12		; Factor in Base index
	sub r5, r5, r12

	mul r12, r4, r7		;	multiply by stride
	add r4, r10, r12	;	first vertex pointer
	mul r12, r5, r7		;	multiply by stride
	add r5, r10, r12	;	second vertex pointer
	mov r9, r2

	; The only difference between CULL_CW and CULL_CCW are a couple the
	; order of subtraction during vector calculation, so we went in and used
	; some local bits to do it, rather than maintain 3 different loops.

	; Furthermore, I will use a conditional to skip over culling entirely to keep
	; just a single loop.  If we need more performance due to branching penalties,
	; then we can unroll things.

	; In the C code, we store both the indices and the vertex pointer.
	; In ARM, we'll only store the vertex pointer; we always know that
	; we can find the index at count, count - 1, and count - 2 in the 
	; index list when we need them.

ARM_ASSEMBLE_IDX_TRISTRIP_LOOP
	; ---------------------------------------------------------------------- --
	; Register Map
	; ---------------------------------------------------------------------- --
	; r0 =	pState		r4 = pVtxA		r8 = flags|cnt  r12 = 		
	; r1 =              r5 = pVtxB  	r9 = pIndices   r13 = sp
	; r2 =              r6 = pVtxC      r10 = pOutVtx   r14 = 
	; r3 =              r7 = stride 	r11 =           r15 = pc
	; ---------------------------------------------------------------------- --
	ldr	 r12, [r0, #HXFSTATE_OFFSET_BASEINDEX]

	; Prefetch
	pld [r9, #HXF_PA_IDX_PREFETCH_STRIP]  ; prefetch the indices ahead 
	ldrh r1, [r9, #HXF_PA_VTX_PREFETCH_BASEIDX_STRIP]		; get point a's index 

	sub r1, r1, r12		; Account for Base Index
	mul r14, r1, r7		;	multiply by stride
	add r1, r10, r14	;	third vertex pointer
	pld [r1] ; prefetch vertex a

	; Load New Vertex
	ldrh r6, [r9, #4]	;	third index

	sub r6, r6, r12		; Account for Base Index
	mul r14, r6, r7		;	multiply by stride
	add r6, r10, r14	;	third vertex pointer

ARM_ASSEMBLE_IDX_TRISTRIP_CLIP 
	; ---------------------------------------------------------------------- --
	; Register Map - Clipping
	; ---------------------------------------------------------------------- --
	; r0 =	pState		r4 = pVtxA		r8 = flags|cnt  r12 = 		
	; r1 =  k1*         r5 = pVtxB  	r9 = pIndices   r13 = sp
	; r2 =  k2*         r6 = pVtxC      r10 = pOutVtx   r14 = 
	; r3 =  k3*         r7 =			r11 =           r15 = pc
	; ---------------------------------------------------------------------- --
	; Load Clip flags
	ldr  r7,  [r0, #HXFSTATE_OFFSET_POUTCLIPFLAGS]		; pointer to clip flag array
	ldr	 r12, [r0, #HXFSTATE_OFFSET_BASEINDEX]

	ldrh r1, [r9]		; get point a's index
	ldrh r2, [r9, #HXF_INDEX_SIZE]		; get point b's index
	ldrh r3, [r9, #(HXF_INDEX_SIZE<<1)]		; get point c's index

	sub r1, r1, r12 ; Account for Base Index
	sub r2, r2, r12
	sub r3, r3, r12

	add r1, r7, r1		; Byte pointer to get the correct clip flag info
	add r2, r7, r2		;	for k1, k2, k3
	add r3, r7, r3

	ldrb r7, [r1]	; k1
	ldrb r11, [r2]	; k2
	ldrb r12, [r3]	; k3

	; logic:
	; k1, k2, k3 are clip flags per vertex
	; if ((k1&k2) && (k2&k3) && (k3&k1)) != 0) means totally outside (all segments totally outside)
	; if (k1|k2|k3) means no clipping necessary (totally in)
	; else clip it.
		
	; if all results are a 1, then the trivial test out passes...
	; that is, if all of the points are outside of the clipping planes
	; and none of the segments cross a clip plane (all withing a clip region), then
	; the triangle is trivially out.
	; Otherwise, more testing is needed.
	tst r7, r11		; first check k1 & k2
	tstne r11, r12		; if (k1&k2) was !0, check (k2&k3)
	tstne r7, r12		; if (k2&k3) was !0 , check (k1&k3)
	bne ARM_ASSEMBLE_IDX_TRISTRIP_NODRAW	; trivial rejection if !0

ARM_ASSEMBLE_IDX_TRISTRIP_TRIVIAL_CLIP 
	; if any result is 1, then the trivial boundary span test fails!
	orrs r10, r7, r11		; if ORing is ever nonzero, then we aren't trivially in
	orrs r10, r10, r12	; if first OR was zero, must check second OR

	; if no 1's, then just draw (trivially in)...
	; but if 0's, then clip first.
	beq ARM_ASSEMBLE_IDX_TRISTRIP_CULL

ARM_ASSEMBLE_IDX_TRISTRIP_CALLCLIP 
	;Call  HXFState* HXFClipTriangle(HXFState* pState, HUINT8* pV1, HUINT8* pV2, HUINT8* pV3)
	str r8, [r0, #HXFSTATE_OFFSET_STORAGE_LR] ; Store Cull info for clipper
	blne |HXFClipTriangle|
	b ARM_ASSEMBLE_IDX_TRISTRIP_NODRAW
	
ARM_ASSEMBLE_IDX_TRISTRIP_CULL 
	; ---------------------------------------------------------------------- --
	; Register Map - Cull
	; ---------------------------------------------------------------------- --
	; r0 =	pState		r4 = pVtxA		r8 = flags|cnt  r12 = 		
	; r1 =              r5 = pVtxB  	r9 = pIndices   r13 = sp
	; r2 =              r6 = pVtxC      r10 = pOutVtx   r14 = 
	; r3 =              r7 =			r11 =           r15 = pc
	; ---------------------------------------------------------------------- --
	; Test if we need to cull 
	tst r8, #HXF_PA_CULL_ENABLE	; cull check  ; ... and submit it to the culling function (inlined) if needed
	beq ARM_ASSEMBLE_IDX_TRISTRIP_DRAW

	; CLIP SPACE vertex  pointers A, B, C in r4, r5, r6
	; result: signed value of dot product; if negative, visible, otherwise not visible

	; The culling algorithm is basically a dot product of the camera to the surface normal.
	; But, we have to generate the normal here from the triangle orientation.
	; Since we are in clip space, the camera is [0,0,+/-1] depending on your LHR CCW or CW flag.
	; This means we only really need the Z component of the face normal.

	; 1)  Create vectors
	ldr r7, [r4]		; ax
	ldr r2, [r5]		; bx
	ldr r1, [r4,#4]		; ay
	ldr r3, [r5,#4]		; by

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -