⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 hxfutilsarm.s

📁 Lido PXA270平台开发板的最新BSP,包括源代码
💻 S
字号:
;/* ************************************************************************ *\
;**    INTEL Corporation Proprietary Information
;**
;**    This listing is supplied under the terms of a license
;**    agreement with INTEL Corporation and may not be copied
;**    nor disclosed except in accordance with the terms of
;**    that agreement.
;**
;**    Copyright (c) 2003 Intel Corporation.
;**    All Rights Reserved.
;**
;** ************************************************************************ **
;**	FILE: HXFUtilsARM.s
;**	DESCRIPTION: Optimized Vertex lighting routines. 
;**	
;**	AUTHOR: Cian Montgomery
;**	CREATED: July 31, 2003
;**
; * $Date: 5/28/04 10:21a $ $Revision: 15 $
; * $Log: /Intel_Development/Drivers/Marathon/WinCE42/opengles/HXFUtilsARM.s $
; * 
; * 15    5/28/04 10:21a Clmontgo
; * Extened 1/w presicion in Clipper
; * 
; * 14    5/27/04 1:54p Clmontgo
; * Fixed point enhancements. 
; * 
; * 13    3/25/04 1:27p Clmontgo
; * Optimization of Clip flag generation and VP XForm. Fix for Clipping
; * issue observed in previous version. 
; * Revision 1.7  2004/03/22 11:43:42  bcb
; * New Intel code drop. 22/03/04
; * 
; * 12    3/19/04 2:11p Clmontgo
; * Moved  VP xform to after clip and 1/w
; * 
; * 11    3/18/04 10:37p Clmontgo
; * Fixes for Clipping and SP clean up
; * 
; * 10    3/14/04 7:53p Clmontgo
; * Fixes for Clipping, Tex Coords, Slaveport optimizations, Clip Flag
; * Generation , and Float To Fixed Conversions.
; * 
; * 9     2/03/04 8:37p Clmontgo
; * Slaveport Rewrite and partial Fix(HACK)
; * 
; * 8     1/30/04 8:53a Clmontgo
; 
; 7     12/21/03 12:59p Clmontgo
; 
; 6     12/17/03 9:22a Clmontgo
; Added Version ID and log to file headers
;\* ************************************************************************ */
	INCLUDE HXFState.inc ; Definitions of the HXFState Structure

;** ************************************************************************ **
;**	CONSTANTS
;** ************************************************************************ **

;** ************************************************************************ **
;**	EXPORTS
;** ************************************************************************ **
	EXPORT	|HXFPreload1|
	EXPORT	|HXFPreload2|
	EXPORT	|HXFPreload3|
	EXPORT	|HXFPreload4|
	EXPORT	|HXFPreload1_2L|
	EXPORT	|HXFPreload2_2L|
	EXPORT	|HXFPreload3_2L|
	EXPORT	|HXFPreload4_2L|

	EXPORT	|HXFViewportTransform|
	EXPORT	|HXFCullTest|


;** ************************************************************************ **
;**	VARIABLES
;** ************************************************************************ **

;** ************************************************************************ **
;**	MAROS
;** ************************************************************************ **

;** ************************************************************************ **
;**	FUNCTIONS
;** ************************************************************************ **
	AREA	.text, CODE, READONLY

;** ************************************************************************ **
; Name:					HXFPreload1
; Description:	
; Input Arguments: 		
; Output Argument:		
; Prototype in C:		void HXFPreload1(void*);
;** ************************************************************************ **
|HXFPreload1| PROC
	pld [r0]
	mov pc, lr
	ENDP
;** ************************************************************************ **

;** ************************************************************************ **
; Name:					HXFPreload2
; Description:	
; Input Arguments: 		
; Output Argument:		
; Prototype in C:		void HXFPreload2(void*, void*);
;** ************************************************************************ **
|HXFPreload2| PROC
	pld [r0]
	pld [r1]
	mov pc, lr
	ENDP
;** ************************************************************************ **

;** ************************************************************************ **
; Name:					HXFPreload3
; Description:	
; Input Arguments: 		
; Output Argument:		
; Prototype in C:		void HXFPreload3(void*, void*, void*);
;** ************************************************************************ **
|HXFPreload3| PROC
	pld [r0]
	pld [r1]
	pld [r2]
	mov pc, lr
	ENDP
;** ************************************************************************ **

;** ************************************************************************ **
; Name:					HXFPreload4
; Description:	
; Input Arguments: 		
; Output Argument:		
; Prototype in C:		void HXFPreload4(void*, void*, void*, void*);
;** ************************************************************************ **
|HXFPreload4| PROC
	pld [r0]
	pld [r1]
	pld [r2]
	pld [r3]
	mov pc, lr
	ENDP
;** ************************************************************************ **


;** ************************************************************************ **
; Name:					HXFPreload1_2L
; Description:	
; Input Arguments: 		
; Output Argument:		
; Prototype in C:		void HXFPreload1_2L(void*);
;** ************************************************************************ **
|HXFPreload1_2L| PROC
	pld [r0]
	pld [r0, #16]
	mov pc, lr
	ENDP
;** ************************************************************************ **

;** ************************************************************************ **
; Name:					HXFPreload2_2L
; Description:	
; Input Arguments: 		
; Output Argument:		
; Prototype in C:		void HXFPreload2_2L(void*, void*);
;** ************************************************************************ **
|HXFPreload2_2L| PROC
	pld [r0]
	pld [r0, #16]
	pld [r1]
	pld [r1, #16]
	mov pc, lr
	ENDP
;** ************************************************************************ **

;** ************************************************************************ **
; Name:					HXFPreload3_2L
; Description:	
; Input Arguments: 		
; Output Argument:		
; Prototype in C:		void HXFPreload3_2L(void*, void*, void*);
;** ************************************************************************ **
|HXFPreload3_2L| PROC
	pld [r0]
	pld [r0, #16]
	pld [r1]
	pld [r1, #16]
	pld [r2]
	pld [r2, #16]
	mov pc, lr
	ENDP
;** ************************************************************************ **

;** ************************************************************************ **
; Name:					HXFPreload4_2L
; Description:	
; Input Arguments: 		
; Output Argument:		
; Prototype in C:		void HXFPreload4_2L(void*, void*, void*, void*);
;** ************************************************************************ **
|HXFPreload4_2L| PROC
	pld [r0]
	pld [r0, #16]
	pld [r1]
	pld [r1, #16]
	pld [r2]
	pld [r2, #16]
	pld [r3]
	pld [r3, #16]
	mov pc, lr
	ENDP
;** ************************************************************************ **

;** ************************************************************************ **
; Name:					HXFCullTest
; Description:		
; Input Arguments: 		r0 - pVtxA
;						r1 - pVtxB
;						r2 - pVtxC
;						r3 - bCullCW
; Output Argument:		r0 - int - Negative -> BackFacing
; Prototype in C:		HINT32 HCullTest(void*, void*, void*, HUINT32 bCullDirCW);
;** ************************************************************************ **
|HXFCullTest| PROC
	stmfd sp!, { r4-r6, lr }
	
	; The culling algorithm is basically a dot product of the camera to the 
	; surface normal. But, we have to generate the normal here from the 
	; triangle orientation. Since we are in clip space, the camera is 
	; [0,0,+/-1] depending on your LHR CCW or CW flag. This means we only 
	; really need the Z component of the face normal.

	; 1)  Create vectors
	ldr r4, [r0]		; ax
	ldr r5, [r1]		; bx
	ldr r0, [r0,#4]		; ay
	ldr r1, [r1,#4]		; by

	cmp r3, #0	; if 0==CW; !0==CCW
	
	; The only difference in computation is really a which term (Ax oy Ay) gets negated.
	; CW:	formula Nz.EyeZ = (AxBy - AyBx )  . [0,0,-1]
	; CW:					= (AyBx - AxBy)
	; CCW:	formula Nz.EyeZ = AxBy - AyBx . [0,0,1]
	; CCW:					= (AxBy - AyBx)

	subeq r5, r4, r5		; CW: -Ax = -(bx - ax ) = ax - bx	
	subne r5, r5, r4		; CCW: Ax = bx - ax					; 

	ldr r6, [r2]			; cx 

	subeq r1, r1, r0		; CW: Ay = by - ay
	subne r1, r0, r1		; CCW: -Ay = -(by - ay) = ay -by		; 

	ldr r2, [r2, #4]			; cy
	; r4 = ax
	; r0 = ay
	; r6 = cx
	; r2 = cy
	; r5 = -/+Ax	(CW/CCW)
	; r1 = +/-Ay

	sub r4, r6, r4			; Bx = cx - ax 
	sub r2, r2, r0			; By = cy - ay

	; r4 = Bx
	; r2 = By
	; r5 = -/+Ax	(CW/CCW)
	; r1 = +/-Ay
	
	; 2) Cross A into B to get Nz ONLY, then compare sign (don't need anything 
	;	 else, as camera in projected space is at the origin)
	smull r12, r0, r5, r2		; AxBy in 64bits of r3r12
	smlal r12, r0, r1, r4		; add -AyBx in 64 bits to r3r12, need middle 32

	; r0 = 32.0 of Nz.EyeZ
	; r1 = 0.32 of Nz.EyeZ

	; Since we only care about the sign, just look at hi word in r0
	; We expect the program to check for a negative!
	
	ldmfd sp!, { r4-r6, pc }
	ENDP
;** ************************************************************************ **



;** ************************************************************************ **
; Name:					HXFViewportTransform
; Description:		
; Input Arguments: 		
; Output Argument:		
; Prototype in C:		void HXFViewportTransform(HXFState* pState, void* pHPos, void* pOutPos);
;** ************************************************************************ **
|HXFViewportTransform| PROC
	stmfd sp!, { r4-r11, lr }
	ldr r5, [r1]
	ldr r6, [r1, #4]
	ldr r7, [r1, #8]
	ldr r8, [r1, #12]
	
	; Calculate inverse W -- r8 = w -> r8 = 1/w
	
	cmp r8, #HFX_ONE; 
	beq HXFVIEWPORTTRANSFORM_VIEWPORT_TRANSFORM	

	; Check for division by 0. 
	cmp r8, #0; 
	beq HXFVIEWPORTTRANSFORM_EXIT

	; Normalize w
	movge   r10, r8
	rsblt   r10, r8, #0

	clz		r14, r10
	mov		r11, r10, lsl r14 ; setup denominator for the divide

    ; Clear out the result registers so we can mult & acc 32-bit values into them.
	mov r10, #0          ; Clear the result register
	mov r9, #0x80000000	 ; set up for the divide
	
	; Do the division computation
	HXF_ONEBITDIVIDE 31, r9, r11,  r10	; 1
	HXF_ONEBITDIVIDE 30, r9, r11,  r10	; 2
    HXF_ONEBITDIVIDE 29, r9, r11,  r10 	; 3
	HXF_ONEBITDIVIDE 28, r9, r11,  r10  ; 4
	
	HXF_ONEBITDIVIDE 27, r9, r11,  r10	; 5 
	HXF_ONEBITDIVIDE 26, r9, r11,  r10	; 6  	
	HXF_ONEBITDIVIDE 25, r9, r11,  r10	; 7  
	HXF_ONEBITDIVIDE 24, r9, r11,  r10	; 8  
	
	HXF_ONEBITDIVIDE 23, r9, r11,  r10	; 9  
	HXF_ONEBITDIVIDE 22, r9, r11,  r10	; 10  
	HXF_ONEBITDIVIDE 21, r9, r11,  r10	; 11  
	HXF_ONEBITDIVIDE 20, r9, r11,  r10	; 12  
	
	HXF_ONEBITDIVIDE 19, r9, r11,  r10	; 13  
	HXF_ONEBITDIVIDE 18, r9, r11,  r10	; 14  	
	HXF_ONEBITDIVIDE 17, r9, r11,  r10	; 15  
	HXF_ONEBITDIVIDE 16, r9, r11,  r10	; 16  

	; convert back to Fixed point	-- lz in r14	
	;	ResultExp = 0 - 15 - lz(w)
	;   Result Shift to FX = 7 - ResultExp
	;   Result Shift to FX = 7 - 0 - 15 - lz(w)
	;   Result Shift to FX = 22 -lz(w)

	rsb r14, r14, #22
	mov r8, r10, lsr r14

	; Restore the proper sign to the w result.
 	cmp r8, #0
	rsblt r10, r10, #0;

	mov r10, #24
	tinsrw wr15, r10, #0	; Setup for the Shift convert 24.40 to 16.16

	; Multiply the vector components by the inverse w.
    wzero	wr0
	tmia 	wr0, r5, r8		; r.x * 1/w
	wzero	wr1
	tmia 	wr1, r6, r8		; r.y * 1/w
	wzero 	wr2
	tmia 	wr2, r7, r8		; r.z * 1/w

    wsrad  wr0, wr0, wr15
    wsrad  wr1, wr1, wr15
	wsrad  wr2, wr2, wr15
	mov r8, r8, lsr #8 ; Convert 1/w from 8.24 to 16.16

    ; Pack the results into two registers use signed saturation to clamp large 
    ; numbers.
    wpackdss wr0, wr0, wr1
    wpackdss wr2, wr2, wr14

    ; Move the results back into the appropriate ARM destination registers.
    tmrrc   r5, r6, wr0
    textrmsw r7, wr2, #0    

	; ---------------------------------------------------------------------- --
	; Register Map - Viewport Transform
	; ---------------------------------------------------------------------- --
	; r0 =	pState      r4 =            r8 = w          r12 = pOutVtx        
	; r1 =  flags       r5 =            r9 =            r13 = sp
	; r2 =              r6 =            r10 =           r14 = wExp 
	; r3 =              r7 =            r11 = 		    r15 = pc
	; ---------------------------------------------------------------------- --
	; wr0 = v.x         wr4 =            wr8 =            wr12 = 0xFFFF0000         
	; wr1 = v.y         wr5 =            wr9 =            wr13 =  zExt 
	; wr2 = v.z         wr6 =            wr10 =           wr14 = Zero 
	; wr3 = v.w         wr7 =            wr11 = 0x04040404wr15 =   
	; wcgr0 = 16        wcgr1 = 12        wcgr2 = 8        wcgr3 = 32
	; ---------------------------------------------------------------------- --
HXFVIEWPORTTRANSFORM_VIEWPORT_TRANSFORM	

	; Viewport transform	
	ldrd    r10, [r0, #HXFSTATE_OFFSET_VIEWPORT_XS]        ; Load VPXScale & VPYScale

	wldrw	wr0, [r0, #HXFSTATE_OFFSET_VIEWPORT_XT]        ; Load VPXTrans
	wldrw	wr1, [r0, #HXFSTATE_OFFSET_VIEWPORT_YT]        ; Load VPYTrans
	wldrw	wr2, [r0, #HXFSTATE_OFFSET_VIEWPORT_ZT]        ; Load VPZTrans

	wslldg	wr0, wr0, wcgr0
	tmia	wr0, r10, r5
	wslldg	wr1, wr1, wcgr0
	ldr    	r10, [r0, #HXFSTATE_OFFSET_VIEWPORT_ZS]        ; Load VP ZScale 
	tmia	wr1, r11, r6
	wslldg	wr2, wr2, wcgr0  
	tmia	wr2, r10, r7

	wsradg	wr0, wr0, wcgr0
	wsradg	wr1, wr1, wcgr0
	wsradg	wr2, wr2, wcgr0
	
	; Store the output vertices - Only in Ortho case do we need to store verts
	wstrw wr0, [r2]
	wstrw wr1, [r2, #4]
	wstrw wr2, [r2, #8]
	str   r8,  [r2, #12]
	
HXFVIEWPORTTRANSFORM_EXIT
	ldmfd sp!, { r4-r11, pc }
	ENDP
;** ************************************************************************ **


;** ************************************************************************ **
	END

;/* ************************************************************************ *\
;** ************************************************************************ **	
;** EOF
;** ************************************************************************ **		
;\* ************************************************************************ */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -