⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 hxfassemblearm.s

📁 Lido PXA270平台开发板的最新BSP,包括源代码
💻 S
📖 第 1 页 / 共 5 页
字号:
;/* ************************************************************************ *\
;**    INTEL Corporation Proprietary Information
;**
;**    This listing is supplied under the terms of a license
;**    agreement with INTEL Corporation and may not be copied
;**    nor disclosed except in accordance with the terms of
;**    that agreement.
;**
;**    Copyright (c) 2003 Intel Corporation.
;**    All Rights Reserved.
;**
;** ************************************************************************ **
;	FILE: HPMUAssembleARM.s
;	DESCRIPTION: 
;	
;	AUTHOR: Cian Montgomery
;	CREATED: July 31, 2003
;
;   $Date: 3/19/04 6:07p $ $Revision: 20 $
;   $Log: /Intel_Development/Drivers/Marathon/WinCE42/opengles/HXFAssembleARM.s $
; * 
; * 20    3/19/04 6:07p Clmontgo
; * Fix conformance issues
; * 
; * 19    3/19/04 2:11p Clmontgo
; * Moved  VP xform to after clip and 1/w
; * 
; * 18    3/18/04 10:37p Clmontgo
; * Fixes for Clipping and SP clean up
; * 
; * 17    2/25/04 11:39a Clmontgo
; * Fix a number of comparisons that were doing sing dependent gt instead
; * of equality based ne tests. 
; * 
; * 16    2/04/04 5:42p Clmontgo
; * Rewrite of Slaveport code to do incremental writes, should eliminate
; * Slaveport write buffer stalls. Added some error handling to Draw*
; * functions. 
; * 
; * 15    2/02/04 2:23p Clmontgo
; * Fixed Clipping Trivially in Test
; * 
; * 14    1/21/04 2:09p Cmdoan
; * fixed hw level strip cull direction
; 
; 13    12/17/03 9:20a Clmontgo
; Added Version ID and log to file headers
;\* ************************************************************************ */
	INCLUDE HXFSTATE.INC

;** ************************************************************************ **
;**	CONSTANTS
;** ************************************************************************ **
HXF_PA_IN_PRIMITIVE		EQU	(1 << 30)

HXF_PA_CULLDIR_CW		EQU	(1 << 28)
HXF_PA_CULL_ENABLE		EQU (1 << 27)

; Assume iteration is about 100 cycles
HXF_PA_IDX_PREFETCH_DISTANCE EQU (4)  ; Number of iteration to fetch ahead
HXF_PA_VTX_PREFETCH_DISTANCE EQU (2)

HXF_PA_IDX_PREFETCH_LIST EQU (HXF_PA_IDX_PREFETCH_DISTANCE * 3 * HXF_INDEX_SIZE)
HXF_PA_IDX_PREFETCH_STRIP EQU (HXF_PA_IDX_PREFETCH_DISTANCE * 1 * HXF_INDEX_SIZE)

HXF_PA_VTX_PREFETCH_BASEIDX_LIST  EQU (HXF_PA_VTX_PREFETCH_DISTANCE * 3 * HXF_INDEX_SIZE)
HXF_PA_VTX_PREFETCH_BASEIDX_STRIP EQU (HXF_PA_VTX_PREFETCH_DISTANCE * 1 * HXF_INDEX_SIZE)

;** ************************************************************************ **
;**	EXPORTS
;** ************************************************************************ **
	EXPORT	|HXFAssembleIndexedTriList|
	EXPORT	|HXFAssembleIndexedTriStrip|
	EXPORT	|HXFAssembleIndexedTriFan|
	EXPORT	|HXFAssembleTriList|
	EXPORT	|HXFAssembleTriStrip|
	EXPORT	|HXFAssembleTriFan|

;** ************************************************************************ **
;**	IMPORTS
;** ************************************************************************ **
	IMPORT  |HXFClipTriangle|	
	IMPORT	|HXFDrawClippedTriangles|
	IMPORT  |ctlBeginPrimitiveList|
	IMPORT  |ctlEndPrimitiveList|
	IMPORT  |ctlVertex|

;** ************************************************************************ **
;**	VARIABLES
;** ************************************************************************ **

;** ************************************************************************ **
;**	FUNCTIONS
;** ************************************************************************ **
	AREA	HXFASSEMBLE, CODE, READONLY

;** ************************************************************************ **
; Name:				HXFAssembleIndexedTriList
; Description:		This draws an indexed TriList.  	
; Here's a layout of what we need to do:

; Init - parameters, variables, and conditional stuff
;	Based on the cullFace direction and clip enabled, we'll branch to the appropriate
;	parts of the iterative loop (CCW/CW, or no cull check, clip) to perform.  
;	Also, set up bDraw and HXF_PA_IN_PRIMITIVE bits in the count register

; Loop (CCW, CW, and no cull, and clipping)
;	Submit a triangle for the cull & clip tests (if enabled)
;		Note:  Complex clipped triangles will be handled directly by the Clip test
;	DRAW: If passes both test, flag bDraw = 1 and HXF_PA_IN_PRIMITIVE = 1 
;		But, if HXF_PA_IN_PRIMITIVE was 0, then call StartList
;	NO_DRAW: If bDraw is 0 (failed cull or clip), bDraw = HXF_PA_IN_PRIMITIVE = 0
;		if HXF_PA_IN_PRIMITIVE was 1, call EndList
;		if HXF_PA_IN_PRIMITIVE was already 0, check to see if ClippedTriangleBuffer needs
;			to be flushed.
;	reset for next loop; flip cullFace direction, move pointers back
; End
;	If HXF_PA_IN_PRIMITIVE, call Endlist.
;	flush ClippedTriangleBuffer if needed
; Input Arguments: 	pState in r0	
; Output Argument:	none
; Prototype in C:	void HXFAssembleIndexedTriList(HXFState* pState);
;** ************************************************************************ **
|HXFAssembleIndexedTriList| PROC
	; ---------------------------------------------------------------------- --
	; Register Map
	; ---------------------------------------------------------------------- --
	; r0 =	pState		r4 = pVtxA		r8 = flags|cnt  r12 = 		
	; r1 =              r5 = pVtxB  	r9 =            r13 = sp
	; r2 =              r6 = pVtxC      r10 =           r14 = 
	; r3 =              r0 =        	r11 =           r15 = pc
	; ---------------------------------------------------------------------- --
	; Basically, r1-r3, r7, r9, r10, and r12 are free for use.
	;	r11 is a special case, where we must do stuff with it

	stmfd sp!, {r4-r11, lr}	; stack push!

	ldr r12, [r0, #HXFSTATE_OFFSET_FLAGS]
	mov r1, #0		; use as loop counter up to NumPrimitives
					; also, implicitly set bits 31-27 to 0 (bDraw, HXF_PA_IN_PRIMITIVE, bClipEnable,
					; HXF_PA_CULLDIR_CW, HXF_PA_CULL_ENABLE

	ands r12, r12, #HXF_CULL_MASK		; mask out all but the Culling bits
	orrne r1, r1, #HXF_PA_CULL_ENABLE			; set to 1 if enabled
	cmp r12, #HXF_CULL_CW				; if HXF_CULL_CW, then start with CW iteration
	orreq r1, r1, #HXF_PA_CULLDIR_CW				; set to 1 for CW, 0 for CCW

	mov r8, r1	; send in the count & flags..
	ldr r9, [r0, #HXFSTATE_OFFSET_PINDICES]				; point to index list
		; which is always incremented sequentially

	str r8, [r0, #HXFSTATE_OFFSET_STORAGE_LR] ; Store Cull info for clipper

	; The only difference between CULL_CW and CULL_CCW are a couple the
	; order of subtraction during vector calculation, so we went in and used
	; some local bits to do it, rather than maintain 3 different loops.

	; Furthermore, I will use a conditional to skip over culling entirely to keep
	; just a single loop.  If we need more performance due to branching penalties,
	; then we can unroll things.

	; In the C code, we store both the indices and the vertex pointer.
	; In ARM, we'll only store the vertex pointer; we always know that
	; we can find the index at count, count - 1, and count - 2 in the 
	; index list when we need them.

ARM_ASSEMBLE_IDX_TRILIST_LOOP
	; ---------------------------------------------------------------------- --
	; Register Map - Loop Setup
	; ---------------------------------------------------------------------- --
	; r0 =	pState		r4 = pVtxA		r8 = flags|cnt  r12 = 		
	; r1 =              r5 = pVtxB  	r9 = pIndices   r13 = sp
	; r2 =              r6 = pVtxC      r10 = pOutVtx   r14 = 
	; r3 =              r7 =			r11 =           r15 = pc
	; ---------------------------------------------------------------------- --
	; Now, go snag 3 vertices in the list.
	ldr r7, [r0, #HXFSTATE_OFFSET_OUTVERTEXSIZE]		; stride
	ldr r10, [r0, #HXFSTATE_OFFSET_POUTVERTICES]		; Point to output list
	ldr r12, [r0, #HXFSTATE_OFFSET_BASEINDEX]

	; Prefetch 
	pld [r9, #HXF_PA_IDX_PREFETCH_LIST]  ; prefetch the indices ahead - 3 iterations ahead
	ldrh r1, [r9, #HXF_PA_VTX_PREFETCH_BASEIDX_LIST]		; get point a's index - 2 ahead
	ldrh r2, [r9, #HXF_PA_VTX_PREFETCH_BASEIDX_LIST+HXF_INDEX_SIZE]	; get point b's index - 2 ahead
	ldrh r3, [r9, #HXF_PA_VTX_PREFETCH_BASEIDX_LIST+(HXF_INDEX_SIZE<<1)] ; get point c's index - 2 ahead

	sub	r1,  r1,  r12	;	Factor in the Base Index
	mul r14, r1,  r7	;	multiply by stride
	add r1,  r10, r14	;	first vertex pointer
	sub	r2,  r2,  r12   ;	Factor in the Base Index
	mul r14, r2,  r7	;	multiply by stride
	add r2,  r10, r14	;	second vertex pointer
	sub	r3,  r3,  r12   ;	Factor in the Base Index
	mul r14, r3,  r7	;	multiply by stride
	add r3,  r10, r14	;	third vertex pointer

	pld [r1] ; prefetch vertex a +2
	pld [r2] ; prefetch vertex b +2
	pld [r3] ; prefetch vertex c +2

	; Load the Indices
	ldrh r4, [r9, #0]	;	first index
	ldrh r5, [r9, #2]	;   second index
	ldrh r6, [r9, #4]	;	third index

	sub	r4,  r4,  r12   ;	Factor in the Base Index
	mul r14, r4,  r7	;	multiply by stride
	add r4,  r10, r14	;	first vertex pointer
	sub	r5,  r5,  r12   ;	Factor in the Base Index
	mul r14, r5,  r7	;	multiply by stride
	add r5,  r10, r14	;	second vertex pointer
	sub	r6,  r6,  r12   ;	Factor in the Base Index
	mul r14, r6,  r7	;	multiply by stride
	add r6,  r10, r14	;	third vertex pointer
	
ARM_ASSEMBLE_IDX_TRILIST_CLIP 
	; ---------------------------------------------------------------------- --
	; Register Map - Clipping
	; ---------------------------------------------------------------------- --
	; r0 =	pState		r4 = pVtxA		r8 = flags|cnt  r12 = 		
	; r1 =  k1*         r5 = pVtxB  	r9 = pIndices   r13 = sp
	; r2 =  k2*         r6 = pVtxC      r10 = pOutVtx   r14 = 
	; r3 =  k3*         r7 =            r11 =           r15 = pc
	; ---------------------------------------------------------------------- --
	; Load Clip flags
	; otherwise enabled, so send clip flag information to clip tester
	ldr r12, [r0, #HXFSTATE_OFFSET_BASEINDEX]
	ldr r7,  [r0, #HXFSTATE_OFFSET_POUTCLIPFLAGS] ; pointer to clip flag array

	ldrh r1, [r9]						; get point a's index
	ldrh r2, [r9, #HXF_INDEX_SIZE]		; get point b's index
	ldrh r3, [r9, #(HXF_INDEX_SIZE<<1)]		; get point c's index

	; Account for base index
	sub r1, r1, r12
	sub r2, r2, r12
	sub r3, r3, r12

	add r1, r7, r1		; Byte pointer to get the correct clip flag info
	add r2, r7, r2		;	for k1, k2, k3
	add r3, r7, r3

	ldrb r7, [r1]	; k1
	ldrb r11, [r2]	; k2
	ldrb r12, [r3]	; k3
	
	; k1, k2, k3 are clip flags per vertex
	; if ((k1&k2) && (k2&k3) && (k3&k1)) != 0) means totally outside (all segments totally outside)
	; if (k1|k2|k3) means no clipping necessary (totally in)
	; else clip it.
	
	; if all results are a 1, then the trivial test out passes...
	; that is, if all of the points are outside of the clipping planes
	; and none of the segments cross a clip plane (all withing a clip region), then
	; the triangle is trivially out.
	; Otherwise, more testing is needed.
	tst r7, r11		; first check k1 & k2
	tstne r11, r12		; if (k1&k2) was 1, check (k2&k3)
	tstne r7, r12		; if (k2&k3) was 1 , check (k1&k3)
	bne ARM_ASSEMBLE_IDX_TRILIST_NODRAW	; trivial rejection if 1

ARM_ASSEMBLE_IDX_TRILIST_TRIVIAL_CLIP 
	; if any result is 1, then the trivial boundary span test fails!
	orrs r10, r7, r11		; if ORing is ever nonzero, then we aren't trivially in
	orrs r10, r10, r12	; if first OR was zero, must check second OR

	; if no 1's, then just draw (trivially in)...
	; but if 0's, then clip first.
	beq ARM_ASSEMBLE_IDX_TRILIST_CULL

ARM_ASSEMBLE_IDX_TRILIST_CALLCLIP 
	; void HXFClipTriangle(HXFState* pState, HUINT8* pV1, HUINT8* pV2, HUINT8* pV3)
	blne |HXFClipTriangle|
	b ARM_ASSEMBLE_IDX_TRILIST_NODRAW
	
ARM_ASSEMBLE_IDX_TRILIST_CULL 
	; ---------------------------------------------------------------------- --
	; Register Map - Cull
	; ---------------------------------------------------------------------- --
	; r0 =	pState		r4 = pVtxA		r8 = flags|cnt  r12 = 		
	; r1 =              r5 = pVtxB  	r9 = pIndices   r13 = sp
	; r2 =              r6 = pVtxC      r10 = pOutVtx   r14 = 
	; r3 =              r7 =        	r11 =           r15 = pc
	; ---------------------------------------------------------------------- --
	; Test if we need to cull 
	tst r8, #HXF_PA_CULL_ENABLE	; cull check
	beq ARM_ASSEMBLE_IDX_TRILIST_DRAW  

	; CLIP SPACE vertex  pointers A, B, C in r4, r5, r6
	; result: signed value of dot product; if negative, visible, otherwise 
	;	not visible

	; The culling algorithm is basically a dot product of the camera to the 
	; surface normal. But, we have to generate the normal here from the 
	; triangle orientation. Since we are in clip space, the camera is 
	; [0,0,+/-1] depending on your LHR CCW or CW flag. This means we only 
	; really need the Z component of the face normal.
	; 1)  Create vectors
	ldr r7, [r4]		; ax
	ldr r2, [r5]		; bx
	ldr r1, [r4,#4]		; ay
	ldr r3, [r5,#4]		; by

	tst r8, #HXF_PA_CULLDIR_CW	; if 0, CCW; 1, CW
	
	; The only difference in computation is really a which term (Ax oy Ay) gets negated.
	; CW:	formula Nz.EyeZ = (AxBy - AyBx )  . [0,0,-1]
	; CW:					= (AyBx - AxBy)
	; CCW:	formula Nz.EyeZ = AxBy - AyBx . [0,0,1]
	; CCW:					= (AxBy - AyBx)

	subne r11, r7, r2		; CW: -Ax = -(bx - ax ) = ax - bx	
	subeq r11, r2, r7		; CCW: Ax = bx - ax					; 

	ldr r2, [r6]			; cx 

	subne r12, r3, r1		; CW: Ay = by - ay
	subeq r12, r1, r3		; CCW: -Ay = -(by - ay) = ay -by		; 

	ldr r3, [r6,#4]			; cy
	; r7 = ax
	; r1 = ay
	; r2 = cx

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -