📄 hxfutilsarm.s
字号:
;/* ************************************************************************ *\
;** INTEL Corporation Proprietary Information
;**
;** This listing is supplied under the terms of a license
;** agreement with INTEL Corporation and may not be copied
;** nor disclosed except in accordance with the terms of
;** that agreement.
;**
;** Copyright (c) 2003 Intel Corporation.
;** All Rights Reserved.
;**
;** ************************************************************************ **
;** FILE: HXFUtilsARM.s
;** DESCRIPTION: Optimized Vertex lighting routines.
;**
;** AUTHOR: Cian Montgomery
;** CREATED: July 31, 2003
;**
; * $Date: 5/28/04 10:21a $ $Revision: 15 $
; * $Log: /Intel_Development/Drivers/Marathon/WinCE42/opengles/HXFUtilsARM.s $
; *
; * 15 5/28/04 10:21a Clmontgo
; * Extened 1/w presicion in Clipper
; *
; * 14 5/27/04 1:54p Clmontgo
; * Fixed point enhancements.
; *
; * 13 3/25/04 1:27p Clmontgo
; * Optimization of Clip flag generation and VP XForm. Fix for Clipping
; * issue observed in previous version.
; * Revision 1.7 2004/03/22 11:43:42 bcb
; * New Intel code drop. 22/03/04
; *
; * 12 3/19/04 2:11p Clmontgo
; * Moved VP xform to after clip and 1/w
; *
; * 11 3/18/04 10:37p Clmontgo
; * Fixes for Clipping and SP clean up
; *
; * 10 3/14/04 7:53p Clmontgo
; * Fixes for Clipping, Tex Coords, Slaveport optimizations, Clip Flag
; * Generation , and Float To Fixed Conversions.
; *
; * 9 2/03/04 8:37p Clmontgo
; * Slaveport Rewrite and partial Fix(HACK)
; *
; * 8 1/30/04 8:53a Clmontgo
;
; 7 12/21/03 12:59p Clmontgo
;
; 6 12/17/03 9:22a Clmontgo
; Added Version ID and log to file headers
;\* ************************************************************************ */
INCLUDE HXFState.inc ; Definitions of the HXFState Structure
;** ************************************************************************ **
;** CONSTANTS
;** ************************************************************************ **
;** ************************************************************************ **
;** EXPORTS
;** ************************************************************************ **
EXPORT |HXFPreload1|
EXPORT |HXFPreload2|
EXPORT |HXFPreload3|
EXPORT |HXFPreload4|
EXPORT |HXFPreload1_2L|
EXPORT |HXFPreload2_2L|
EXPORT |HXFPreload3_2L|
EXPORT |HXFPreload4_2L|
EXPORT |HXFViewportTransform|
EXPORT |HXFCullTest|
;** ************************************************************************ **
;** VARIABLES
;** ************************************************************************ **
;** ************************************************************************ **
;** MAROS
;** ************************************************************************ **
;** ************************************************************************ **
;** FUNCTIONS
;** ************************************************************************ **
AREA .text, CODE, READONLY
;** ************************************************************************ **
; Name: HXFPreload1
; Description:
; Input Arguments:
; Output Argument:
; Prototype in C: void HXFPreload1(void*);
;** ************************************************************************ **
|HXFPreload1| PROC
pld [r0]
mov pc, lr
ENDP
;** ************************************************************************ **
;** ************************************************************************ **
; Name: HXFPreload2
; Description:
; Input Arguments:
; Output Argument:
; Prototype in C: void HXFPreload2(void*, void*);
;** ************************************************************************ **
|HXFPreload2| PROC
pld [r0]
pld [r1]
mov pc, lr
ENDP
;** ************************************************************************ **
;** ************************************************************************ **
; Name: HXFPreload3
; Description:
; Input Arguments:
; Output Argument:
; Prototype in C: void HXFPreload3(void*, void*, void*);
;** ************************************************************************ **
|HXFPreload3| PROC
pld [r0]
pld [r1]
pld [r2]
mov pc, lr
ENDP
;** ************************************************************************ **
;** ************************************************************************ **
; Name: HXFPreload4
; Description:
; Input Arguments:
; Output Argument:
; Prototype in C: void HXFPreload4(void*, void*, void*, void*);
;** ************************************************************************ **
|HXFPreload4| PROC
pld [r0]
pld [r1]
pld [r2]
pld [r3]
mov pc, lr
ENDP
;** ************************************************************************ **
;** ************************************************************************ **
; Name: HXFPreload1_2L
; Description:
; Input Arguments:
; Output Argument:
; Prototype in C: void HXFPreload1_2L(void*);
;** ************************************************************************ **
|HXFPreload1_2L| PROC
pld [r0]
pld [r0, #16]
mov pc, lr
ENDP
;** ************************************************************************ **
;** ************************************************************************ **
; Name: HXFPreload2_2L
; Description:
; Input Arguments:
; Output Argument:
; Prototype in C: void HXFPreload2_2L(void*, void*);
;** ************************************************************************ **
|HXFPreload2_2L| PROC
pld [r0]
pld [r0, #16]
pld [r1]
pld [r1, #16]
mov pc, lr
ENDP
;** ************************************************************************ **
;** ************************************************************************ **
; Name: HXFPreload3_2L
; Description:
; Input Arguments:
; Output Argument:
; Prototype in C: void HXFPreload3_2L(void*, void*, void*);
;** ************************************************************************ **
|HXFPreload3_2L| PROC
pld [r0]
pld [r0, #16]
pld [r1]
pld [r1, #16]
pld [r2]
pld [r2, #16]
mov pc, lr
ENDP
;** ************************************************************************ **
;** ************************************************************************ **
; Name: HXFPreload4_2L
; Description:
; Input Arguments:
; Output Argument:
; Prototype in C: void HXFPreload4_2L(void*, void*, void*, void*);
;** ************************************************************************ **
|HXFPreload4_2L| PROC
pld [r0]
pld [r0, #16]
pld [r1]
pld [r1, #16]
pld [r2]
pld [r2, #16]
pld [r3]
pld [r3, #16]
mov pc, lr
ENDP
;** ************************************************************************ **
;** ************************************************************************ **
; Name: HXFCullTest
; Description:
; Input Arguments: r0 - pVtxA
; r1 - pVtxB
; r2 - pVtxC
; r3 - bCullCW
; Output Argument: r0 - int - Negative -> BackFacing
; Prototype in C: HINT32 HCullTest(void*, void*, void*, HUINT32 bCullDirCW);
;** ************************************************************************ **
|HXFCullTest| PROC
stmfd sp!, { r4-r6, lr }
; The culling algorithm is basically a dot product of the camera to the
; surface normal. But, we have to generate the normal here from the
; triangle orientation. Since we are in clip space, the camera is
; [0,0,+/-1] depending on your LHR CCW or CW flag. This means we only
; really need the Z component of the face normal.
; 1) Create vectors
ldr r4, [r0] ; ax
ldr r5, [r1] ; bx
ldr r0, [r0,#4] ; ay
ldr r1, [r1,#4] ; by
cmp r3, #0 ; if 0==CW; !0==CCW
; The only difference in computation is really a which term (Ax oy Ay) gets negated.
; CW: formula Nz.EyeZ = (AxBy - AyBx ) . [0,0,-1]
; CW: = (AyBx - AxBy)
; CCW: formula Nz.EyeZ = AxBy - AyBx . [0,0,1]
; CCW: = (AxBy - AyBx)
subeq r5, r4, r5 ; CW: -Ax = -(bx - ax ) = ax - bx
subne r5, r5, r4 ; CCW: Ax = bx - ax ;
ldr r6, [r2] ; cx
subeq r1, r1, r0 ; CW: Ay = by - ay
subne r1, r0, r1 ; CCW: -Ay = -(by - ay) = ay -by ;
ldr r2, [r2, #4] ; cy
; r4 = ax
; r0 = ay
; r6 = cx
; r2 = cy
; r5 = -/+Ax (CW/CCW)
; r1 = +/-Ay
sub r4, r6, r4 ; Bx = cx - ax
sub r2, r2, r0 ; By = cy - ay
; r4 = Bx
; r2 = By
; r5 = -/+Ax (CW/CCW)
; r1 = +/-Ay
; 2) Cross A into B to get Nz ONLY, then compare sign (don't need anything
; else, as camera in projected space is at the origin)
smull r12, r0, r5, r2 ; AxBy in 64bits of r3r12
smlal r12, r0, r1, r4 ; add -AyBx in 64 bits to r3r12, need middle 32
; r0 = 32.0 of Nz.EyeZ
; r1 = 0.32 of Nz.EyeZ
; Since we only care about the sign, just look at hi word in r0
; We expect the program to check for a negative!
ldmfd sp!, { r4-r6, pc }
ENDP
;** ************************************************************************ **
;** ************************************************************************ **
; Name: HXFViewportTransform
; Description:
; Input Arguments:
; Output Argument:
; Prototype in C: void HXFViewportTransform(HXFState* pState, void* pHPos, void* pOutPos);
;** ************************************************************************ **
|HXFViewportTransform| PROC
stmfd sp!, { r4-r11, lr }
ldr r5, [r1]
ldr r6, [r1, #4]
ldr r7, [r1, #8]
ldr r8, [r1, #12]
; Calculate inverse W -- r8 = w -> r8 = 1/w
cmp r8, #HFX_ONE;
beq HXFVIEWPORTTRANSFORM_VIEWPORT_TRANSFORM
; Check for division by 0.
cmp r8, #0;
beq HXFVIEWPORTTRANSFORM_EXIT
; Normalize w
movge r10, r8
rsblt r10, r8, #0
clz r14, r10
mov r11, r10, lsl r14 ; setup denominator for the divide
; Clear out the result registers so we can mult & acc 32-bit values into them.
mov r10, #0 ; Clear the result register
mov r9, #0x80000000 ; set up for the divide
; Do the division computation
HXF_ONEBITDIVIDE 31, r9, r11, r10 ; 1
HXF_ONEBITDIVIDE 30, r9, r11, r10 ; 2
HXF_ONEBITDIVIDE 29, r9, r11, r10 ; 3
HXF_ONEBITDIVIDE 28, r9, r11, r10 ; 4
HXF_ONEBITDIVIDE 27, r9, r11, r10 ; 5
HXF_ONEBITDIVIDE 26, r9, r11, r10 ; 6
HXF_ONEBITDIVIDE 25, r9, r11, r10 ; 7
HXF_ONEBITDIVIDE 24, r9, r11, r10 ; 8
HXF_ONEBITDIVIDE 23, r9, r11, r10 ; 9
HXF_ONEBITDIVIDE 22, r9, r11, r10 ; 10
HXF_ONEBITDIVIDE 21, r9, r11, r10 ; 11
HXF_ONEBITDIVIDE 20, r9, r11, r10 ; 12
HXF_ONEBITDIVIDE 19, r9, r11, r10 ; 13
HXF_ONEBITDIVIDE 18, r9, r11, r10 ; 14
HXF_ONEBITDIVIDE 17, r9, r11, r10 ; 15
HXF_ONEBITDIVIDE 16, r9, r11, r10 ; 16
; convert back to Fixed point -- lz in r14
; ResultExp = 0 - 15 - lz(w)
; Result Shift to FX = 7 - ResultExp
; Result Shift to FX = 7 - 0 - 15 - lz(w)
; Result Shift to FX = 22 -lz(w)
rsb r14, r14, #22
mov r8, r10, lsr r14
; Restore the proper sign to the w result.
cmp r8, #0
rsblt r10, r10, #0;
mov r10, #24
tinsrw wr15, r10, #0 ; Setup for the Shift convert 24.40 to 16.16
; Multiply the vector components by the inverse w.
wzero wr0
tmia wr0, r5, r8 ; r.x * 1/w
wzero wr1
tmia wr1, r6, r8 ; r.y * 1/w
wzero wr2
tmia wr2, r7, r8 ; r.z * 1/w
wsrad wr0, wr0, wr15
wsrad wr1, wr1, wr15
wsrad wr2, wr2, wr15
mov r8, r8, lsr #8 ; Convert 1/w from 8.24 to 16.16
; Pack the results into two registers use signed saturation to clamp large
; numbers.
wpackdss wr0, wr0, wr1
wpackdss wr2, wr2, wr14
; Move the results back into the appropriate ARM destination registers.
tmrrc r5, r6, wr0
textrmsw r7, wr2, #0
; ---------------------------------------------------------------------- --
; Register Map - Viewport Transform
; ---------------------------------------------------------------------- --
; r0 = pState r4 = r8 = w r12 = pOutVtx
; r1 = flags r5 = r9 = r13 = sp
; r2 = r6 = r10 = r14 = wExp
; r3 = r7 = r11 = r15 = pc
; ---------------------------------------------------------------------- --
; wr0 = v.x wr4 = wr8 = wr12 = 0xFFFF0000
; wr1 = v.y wr5 = wr9 = wr13 = zExt
; wr2 = v.z wr6 = wr10 = wr14 = Zero
; wr3 = v.w wr7 = wr11 = 0x04040404wr15 =
; wcgr0 = 16 wcgr1 = 12 wcgr2 = 8 wcgr3 = 32
; ---------------------------------------------------------------------- --
HXFVIEWPORTTRANSFORM_VIEWPORT_TRANSFORM
; Viewport transform
ldrd r10, [r0, #HXFSTATE_OFFSET_VIEWPORT_XS] ; Load VPXScale & VPYScale
wldrw wr0, [r0, #HXFSTATE_OFFSET_VIEWPORT_XT] ; Load VPXTrans
wldrw wr1, [r0, #HXFSTATE_OFFSET_VIEWPORT_YT] ; Load VPYTrans
wldrw wr2, [r0, #HXFSTATE_OFFSET_VIEWPORT_ZT] ; Load VPZTrans
wslldg wr0, wr0, wcgr0
tmia wr0, r10, r5
wslldg wr1, wr1, wcgr0
ldr r10, [r0, #HXFSTATE_OFFSET_VIEWPORT_ZS] ; Load VP ZScale
tmia wr1, r11, r6
wslldg wr2, wr2, wcgr0
tmia wr2, r10, r7
wsradg wr0, wr0, wcgr0
wsradg wr1, wr1, wcgr0
wsradg wr2, wr2, wcgr0
; Store the output vertices - Only in Ortho case do we need to store verts
wstrw wr0, [r2]
wstrw wr1, [r2, #4]
wstrw wr2, [r2, #8]
str r8, [r2, #12]
HXFVIEWPORTTRANSFORM_EXIT
ldmfd sp!, { r4-r11, pc }
ENDP
;** ************************************************************************ **
;** ************************************************************************ **
END
;/* ************************************************************************ *\
;** ************************************************************************ **
;** EOF
;** ************************************************************************ **
;\* ************************************************************************ */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -