⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 hxfassemblearmprn725.s

📁 Lido PXA270平台开发板的最新BSP,包括源代码
💻 S
📖 第 1 页 / 共 4 页
字号:
;/* ************************************************************************ *\
;**    INTEL Corporation Proprietary Information
;**
;**    This listing is supplied under the terms of a license
;**    agreement with INTEL Corporation and may not be copied
;**    nor disclosed except in accordance with the terms of
;**    that agreement.
;**
;**    Copyright (c) 2003 Intel Corporation.
;**    All Rights Reserved.
;**
;** ************************************************************************ **
;	FILE: HXFAssembleARMPRN725.s
;	DESCRIPTION:  This File contains replication of the TriList/Strip processing
;		Functions defined in HXFAssembleARM.s. The functions in this file have been
;		modified to truncate Triangle Strips to 2 in length and Tringle lists to contain
;		only a single triangle. This is necessary to prevent exposing HW bug  
;		when Multitexturing and Fogging is enabled. 
;
;	AUTHOR: Cian Montgomery
;	CREATED: October 19, 2003
;
;   $Date:          $ $Revision:   $
;   $Log:                                                                      $
;\* ************************************************************************ */

	INCLUDE HXFSTATE.INC

	IF :DEF:HXF_FIX_PRN725	
	
;** ************************************************************************ **
;**	CONSTANTS
;** ************************************************************************ **
HXF_PA_IN_PRIMITIVE		EQU	(1 << 30)
HXF_PA_DRAWONE			EQU	(1 << 29)
HXF_PA_CULLDIR_CW		EQU	(1 << 28)
HXF_PA_CULL_ENABLE		EQU (1 << 27)

; Assume iteration is about 100 cycles
HXF_PA_IDX_PREFETCH_DISTANCE EQU (4)  ; Number of iteration to fetch ahead
HXF_PA_VTX_PREFETCH_DISTANCE EQU (2)

HXF_PA_IDX_PREFETCH_LIST EQU (HXF_PA_IDX_PREFETCH_DISTANCE * 3 * HXF_INDEX_SIZE)
HXF_PA_IDX_PREFETCH_STRIP EQU (HXF_PA_IDX_PREFETCH_DISTANCE * 1 * HXF_INDEX_SIZE)

HXF_PA_VTX_PREFETCH_BASEIDX_LIST  EQU (HXF_PA_VTX_PREFETCH_DISTANCE * 3 * HXF_INDEX_SIZE)
HXF_PA_VTX_PREFETCH_BASEIDX_STRIP EQU (HXF_PA_VTX_PREFETCH_DISTANCE * 1 * HXF_INDEX_SIZE)

;** ************************************************************************ **
;**	EXPORTS
;** ************************************************************************ **
	EXPORT	|HXFAssembleIndexedTriListShort|
	EXPORT	|HXFAssembleIndexedTriStripShort|
	EXPORT	|HXFAssembleTriListShort|
	EXPORT	|HXFAssembleTriStripShort|

;** ************************************************************************ **
;**	IMPORTS
;** ************************************************************************ **
	IMPORT  |HXFClipTriangle|	
	IMPORT	|HXFDrawClippedTriangles|
	IMPORT  |ctlBeginPrimitiveList|
	IMPORT  |ctlEndPrimitiveList|
	IMPORT	|ctlVertex|

;** ************************************************************************ **
;**	VARIABLES
;** ************************************************************************ **

;** ************************************************************************ **
;**	FUNCTIONS
;** ************************************************************************ **
	AREA	HXFASSEMBLE, CODE, READONLY

;** ************************************************************************ **
; Name:				HXFAssembleIndexedTriList
; Description:		This draws an indexed TriList.  	
; Here's a layout of what we need to do:

; Init - parameters, variables, and conditional stuff
;	Based on the cullFace direction and clip enabled, we'll branch to the appropriate
;	parts of the iterative loop (CCW/CW, or no cull check, clip) to perform.  
;	Also, set up bDraw and HXF_PA_IN_PRIMITIVE bits in the count register

; Loop (CCW, CW, and no cull, and clipping)
;	Submit a triangle for the cull & clip tests (if enabled)
;		Note:  Complex clipped triangles will be handled directly by the Clip test
;	DRAW: If passes both test, flag bDraw = 1 and HXF_PA_IN_PRIMITIVE = 1 
;		But, if HXF_PA_IN_PRIMITIVE was 0, then call StartList
;	NO_DRAW: If bDraw is 0 (failed cull or clip), bDraw = HXF_PA_IN_PRIMITIVE = 0
;		if HXF_PA_IN_PRIMITIVE was 1, call EndList
;		if HXF_PA_IN_PRIMITIVE was already 0, check to see if ClippedTriangleBuffer needs
;			to be flushed.
;	reset for next loop; flip cullFace direction, move pointers back
; End
;	If HXF_PA_IN_PRIMITIVE, call Endlist.
;	flush ClippedTriangleBuffer if needed
; Input Arguments: 	pState in r0	
; Output Argument:	none
; Prototype in C:	void HXFAssembleIndexedTriList(HXFState* pState);
;** ************************************************************************ **
|HXFAssembleIndexedTriListShort| PROC
	; ---------------------------------------------------------------------- --
	; Register Map
	; ---------------------------------------------------------------------- --
	; r0 =	pState		r4 = pVtxA		r8 = flags|cnt  r12 = 		
	; r1 =              r5 = pVtxB  	r9 =            r13 = sp
	; r2 =              r6 = pVtxC      r10 =           r14 = 
	; r3 =              r0 =        	r11 =           r15 = pc
	; ---------------------------------------------------------------------- --
	; Basically, r1-r3, r7, r9, r10, and r12 are free for use.
	;	r11 is a special case, where we must do stuff with it

	stmfd sp!, {r4-r11, lr}	; stack push!

	ldr r12, [r0, #HXFSTATE_OFFSET_FLAGS]
	mov r1, #0		; use as loop counter up to NumPrimitives
					; also, implicitly set bits 31-27 to 0 (bDraw, HXF_PA_IN_PRIMITIVE, bClipEnable,
					; HXF_PA_CULLDIR_CW, HXF_PA_CULL_ENABLE

	ands r12, r12, #HXF_CULL_MASK		; mask out all but the Culling bits
	orrne r1, r1, #HXF_PA_CULL_ENABLE			; set to 1 if enabled
	cmp r12, #HXF_CULL_CW				; if HXF_CULL_CW, then start with CW iteration
	orreq r1, r1, #HXF_PA_CULLDIR_CW				; set to 1 for CW, 0 for CCW

	mov r8, r1	; send in the count & flags..
	ldr r9, [r0, #HXFSTATE_OFFSET_PINDICES]				; point to index list
		; which is always incremented sequentially

	str r8, [r0, #HXFSTATE_OFFSET_STORAGE_LR] ; Store Cull info for clipper

	; The only difference between CULL_CW and CULL_CCW are a couple the
	; order of subtraction during vector calculation, so we went in and used
	; some local bits to do it, rather than maintain 3 different loops.

	; Furthermore, I will use a conditional to skip over culling entirely to keep
	; just a single loop.  If we need more performance due to branching penalties,
	; then we can unroll things.

	; In the C code, we store both the indices and the vertex pointer.
	; In ARM, we'll only store the vertex pointer; we always know that
	; we can find the index at count, count - 1, and count - 2 in the 
	; index list when we need them.

ARM_ASSEMBLE_IDX_TRILIST_LOOP
	; ---------------------------------------------------------------------- --
	; Register Map - Loop Setup
	; ---------------------------------------------------------------------- --
	; r0 =	pState		r4 = pVtxA		r8 = flags|cnt  r12 = 		
	; r1 =              r5 = pVtxB  	r9 = pIndices   r13 = sp
	; r2 =              r6 = pVtxC      r10 = pOutVtx   r14 = 
	; r3 =              r7 =			r11 =           r15 = pc
	; ---------------------------------------------------------------------- --
	; Now, go snag 3 vertices in the list.
	ldr r7, [r0, #HXFSTATE_OFFSET_OUTVERTEXSIZE]		; stride
	ldr r10, [r0, #HXFSTATE_OFFSET_POUTVERTICES]		; Point to output list
	ldr r12, [r0, #HXFSTATE_OFFSET_BASEINDEX]

	; Prefetch 
	pld [r9, #HXF_PA_IDX_PREFETCH_LIST]  ; prefetch the indices ahead - 3 iterations ahead
	ldrh r1, [r9, #HXF_PA_VTX_PREFETCH_BASEIDX_LIST]		; get point a's index - 2 ahead
	ldrh r2, [r9, #HXF_PA_VTX_PREFETCH_BASEIDX_LIST+HXF_INDEX_SIZE]	; get point b's index - 2 ahead
	ldrh r3, [r9, #HXF_PA_VTX_PREFETCH_BASEIDX_LIST+(HXF_INDEX_SIZE<<1)] ; get point c's index - 2 ahead

	sub	r1,  r1,  r12	;	Factor in the Base Index
	mul r14, r1,  r7	;	multiply by stride
	add r1,  r10, r14	;	first vertex pointer
	sub	r2,  r2,  r12   ;	Factor in the Base Index
	mul r14, r2,  r7	;	multiply by stride
	add r2,  r10, r14	;	second vertex pointer
	sub	r3,  r3,  r12   ;	Factor in the Base Index
	mul r14, r3,  r7	;	multiply by stride
	add r3,  r10, r14	;	third vertex pointer

	pld [r1] ; prefetch vertex a +2
	pld [r2] ; prefetch vertex b +2
	pld [r3] ; prefetch vertex c +2

	; Load the Indices
	ldrh r4, [r9, #0]	;	first index
	ldrh r5, [r9, #2]	;   second index
	ldrh r6, [r9, #4]	;	third index

	sub	r4,  r4,  r12   ;	Factor in the Base Index
	mul r14, r4,  r7	;	multiply by stride
	add r4,  r10, r14	;	first vertex pointer
	sub	r5,  r5,  r12   ;	Factor in the Base Index
	mul r14, r5,  r7	;	multiply by stride
	add r5,  r10, r14	;	second vertex pointer
	sub	r6,  r6,  r12   ;	Factor in the Base Index
	mul r14, r6,  r7	;	multiply by stride
	add r6,  r10, r14	;	third vertex pointer
	
ARM_ASSEMBLE_IDX_TRILIST_CLIP 
	; ---------------------------------------------------------------------- --
	; Register Map - Clipping
	; ---------------------------------------------------------------------- --
	; r0 =	pState		r4 = pVtxA		r8 = flags|cnt  r12 = 		
	; r1 =  k1*         r5 = pVtxB  	r9 = pIndices   r13 = sp
	; r2 =  k2*         r6 = pVtxC      r10 = pOutVtx   r14 = 
	; r3 =  k3*         r7 =            r11 =           r15 = pc
	; ---------------------------------------------------------------------- --
	; Load Clip flags
	; otherwise enabled, so send clip flag information to clip tester
	ldr r12, [r0, #HXFSTATE_OFFSET_BASEINDEX]
	ldr r7,  [r0, #HXFSTATE_OFFSET_POUTCLIPFLAGS] ; pointer to clip flag array

	ldrh r1, [r9]						; get point a's index
	ldrh r2, [r9, #HXF_INDEX_SIZE]		; get point b's index
	ldrh r3, [r9, #(HXF_INDEX_SIZE<<1)]		; get point c's index

	; Account for base index
	sub r1, r1, r12
	sub r2, r2, r12
	sub r3, r3, r12

	add r1, r7, r1		; Byte pointer to get the correct clip flag info
	add r2, r7, r2		;	for k1, k2, k3
	add r3, r7, r3

	ldrb r7, [r1]	; k1
	ldrb r11, [r2]	; k2
	ldrb r12, [r3]	; k3
	
	; k1, k2, k3 are clip flags per vertex
	; if ((k1&k2) && (k2&k3) && (k3&k1)) != 0) means totally outside (all segments totally outside)
	; if (k1|k2|k3) means no clipping necessary (totally in)
	; else clip it.
	
	; if all results are a 1, then the trivial test out passes...
	; that is, if all of the points are outside of the clipping planes
	; and none of the segments cross a clip plane (all withing a clip region), then
	; the triangle is trivially out.
	; Otherwise, more testing is needed.
	tst r7, r11		; first check k1 & k2
	tstne r11, r12		; if (k1&k2) was 1, check (k2&k3)
	tstne r7, r12		; if (k2&k3) was 1 , check (k1&k3)
	bne ARM_ASSEMBLE_IDX_TRILIST_NODRAW	; trivial rejection if 1

ARM_ASSEMBLE_IDX_TRILIST_TRIVIAL_CLIP 
	; if any result is 1, then the trivial boundary span test fails!
	orrs r10, r7, r11		; if ORing is ever nonzero, then we aren't trivially in
	orrs r10, r10, r12	; if first OR was zero, must check second OR

	; if no 1's, then just draw (trivially in)...
	; but if 0's, then clip first.
	beq ARM_ASSEMBLE_IDX_TRILIST_CULL

ARM_ASSEMBLE_IDX_TRILIST_CALLCLIP 
	; void HXFClipTriangle(HXFState* pState, HUINT8* pV1, HUINT8* pV2, HUINT8* pV3)
	blne |HXFClipTriangle|
	b ARM_ASSEMBLE_IDX_TRILIST_NODRAW
	
ARM_ASSEMBLE_IDX_TRILIST_CULL 
	; ---------------------------------------------------------------------- --
	; Register Map - Cull
	; ---------------------------------------------------------------------- --
	; r0 =	pState		r4 = pVtxA		r8 = flags|cnt  r12 = 		
	; r1 =              r5 = pVtxB  	r9 = pIndices   r13 = sp
	; r2 =              r6 = pVtxC      r10 = pOutVtx   r14 = 
	; r3 =              r7 =        	r11 =           r15 = pc
	; ---------------------------------------------------------------------- --
	; Test if we need to cull 
	tst r8, #HXF_PA_CULL_ENABLE	; cull check
	beq ARM_ASSEMBLE_IDX_TRILIST_DRAW  

	; CLIP SPACE vertex  pointers A, B, C in r4, r5, r6
	; result: signed value of dot product; if negative, visible, otherwise 
	;	not visible

	; The culling algorithm is basically a dot product of the camera to the 
	; surface normal. But, we have to generate the normal here from the 
	; triangle orientation. Since we are in clip space, the camera is 
	; [0,0,+/-1] depending on your LHR CCW or CW flag. This means we only 
	; really need the Z component of the face normal.
	; 1)  Create vectors
	ldr r7, [r4]		; ax
	ldr r2, [r5]		; bx
	ldr r1, [r4,#4]		; ay
	ldr r3, [r5,#4]		; by

	tst r8, #HXF_PA_CULLDIR_CW	; if 0, CCW; 1, CW
	
	; The only difference in computation is really a which term (Ax oy Ay) gets negated.
	; CW:	formula Nz.EyeZ = (AxBy - AyBx )  . [0,0,-1]
	; CW:					= (AyBx - AxBy)
	; CCW:	formula Nz.EyeZ = AxBy - AyBx . [0,0,1]
	; CCW:					= (AxBy - AyBx)

	subne r11, r7, r2		; CW: -Ax = -(bx - ax ) = ax - bx	
	subeq r11, r2, r7		; CCW: Ax = bx - ax					; 

	ldr r2, [r6]			; cx 

	subne r12, r3, r1		; CW: Ay = by - ay
	subeq r12, r1, r3		; CCW: -Ay = -(by - ay) = ay -by		; 

	ldr r3, [r6,#4]			; cy
	; r7 = ax
	; r1 = ay
	; r2 = cx
	; r3 = cy
	; r11 = -/+Ax	(CW/CCW)
	; r12 = +/-Ay

	sub r7, r2, r7			; Bx = cx - ax 
	sub r1, r3, r1			; By = cy - ay

	; r7 = Bx
	; r1 = By
	; r11 = -/+Ax	(CW/CCW)
	; r12 = +/-Ay
	
	; 2) Cross A into B to get Nz ONLY, then compare sign (don't need anything 
	;	 else, as camera in projected space is at the origin)
	smull r3, r2, r11, r1		; AxBy in 64bits of r3r12
	smlals r3, r2, r12, r7		; add -AyBx in 64 bits to r3r12, need middle 32

	; r2 = 32.0 of Nz.EyeZ
	; r3 = 0.32 of Nz.EyeZ

	; Since we only care about the sign, just look at hi word in r3
	; We expect the program to check for a negative!
	blt	ARM_ASSEMBLE_IDX_TRILIST_NODRAW

; Draw; gets here from cull disabled, cull pass and clip disabled, or cull/clip pass.
ARM_ASSEMBLE_IDX_TRILIST_DRAW 
	; ---------------------------------------------------------------------- --
	; Register Map
	; ---------------------------------------------------------------------- --
	; r0 =	pState		r4 = pVtxA		r8 = flags|cnt  r12 = 		
	; r1 =              r5 = pVtxB  	r9 = pIndices   r13 = sp
	; r2 =              r6 = pVtxC      r10 = pOutVtx   r14 = 
	; r3 =              r7 =			r11 =           r15 = pc
	; ---------------------------------------------------------------------- --

	;Call HXFState* HXFBeginPrimitiveList(HXFState*, void* pVtx0, void* pVtx1);
	mov r1, #0				; vtx0 * = NULL
	mov r2, #0				; vtx1 * = NULL
	mov r3, #0
	bl |ctlBeginPrimitiveList|

; Now, do cumpulsory DrawVertex
ARM_ASSEMBLE_IDX_TRILIST_STARTDRAW3 

	;Call HXFState* pState->pSPProc(HXFState* pState, void* vtx0, void* vtx1, void* vtx2);
	;ldr r12, [r0, #HXFSTATE_OFFSET_PSPPROC]
	mov r1, r4	; load vtx0
	mov r2, r5	; load vtx1
	mov r3, r6	; load vtx2
	;mov lr, pc
	;mov pc, r12
	bl |ctlVertex|

	ldr lr, = ARM_ASSEMBLE_IDX_TRILIST_LOOPBACK
	b |ctlEndPrimitiveList|

ARM_ASSEMBLE_IDX_TRILIST_NODRAW 
	; exit point for any failed bDraw checks

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -