📄 hxflightarm.s
字号:
;/* ************************************************************************ *\
;** INTEL Corporation Proprietary Information
;**
;** This listing is supplied under the terms of a license
;** agreement with INTEL Corporation and may not be copied
;** nor disclosed except in accordance with the terms of
;** that agreement.
;**
;** Copyright (c) 2003 Intel Corporation.
;** All Rights Reserved.
;**
;** ************************************************************************ **
; FILE: HXFLight.s
; DESCRIPTION: Optimized Vertex lighting routines.
; FIXME: Use the following to further optimize these routines.
; The issue latency of the WMUL and WMADD instructions is one cycle and the
; result and resource latency are two cycles. The second WMUL instruction in
; the following example stalls for one cycle due to the two cycle resource
; latency.
; WMULUM wR0, wR1, wR2
; WMULSL wR3, wR4, wR5
; The WADD instruction in the following example stalls for three cycles due
; to the dependency of the source operands.
; (refer to section 6.3.1 Data Hazards)
; WMULUM wR0, wR1, wR2
; WADDHUS wR1, wR0, wR2
;
; AUTHOR: Cian Montgomery
; CREATED: July 31, 2003
;
; $Date: 6/08/04 3:31p $ $Revision: 51 $
; $Log: /Intel_Development/Drivers/Marathon/WinCE42/opengles/HXFLightARM.s $
; *
; * 51 6/08/04 3:31p Cmdoan
; * Directional Light fix for Five Sphere's app.
; *
; * 50 6/08/04 8:00a Cmdoan
; * Check in; file synch
; *
; * 49 6/07/04 4:41p Cmdoan
; * Replaced HXFDirectionalLight*Proc "presets" with raw code, copied from
; * the Light_* procedures.
; *
; * 48 6/07/04 3:46p Cmdoan
; * Update for HXF_DirectionalLightProc_CM only, need to do spot and point
; Revision 1.11 2004/06/02 08:48:51 dab
; ogles20040601_V370_104_HXF
; *
; * 47 6/01/04 4:59p Cmdoan
; * Added 32.0 and 0.32 precision to DSqr and InvD
; *
; * 46 5/27/04 9:14a Cmdoan
; * added back diffuse wzero wr3 at line 1430
; *
; * 45 5/26/04 2:14p Clmontgo
; * Fixes for numerical overflows
; *
; * 44 5/21/04 9:59a Clmontgo
; *
; * 43 5/19/04 5:13p Clmontgo
; * Remove wMMX debug code
; *
; * 42 5/16/04 12:53p Clmontgo
; *
; * 41 4/06/04 11:34a Clmontgo
; * Remove Debug Code
; *
; * 40 4/06/04 11:32a Clmontgo
; * Fix For spot lighting.
; *
; * 39 4/06/04 10:11a Clmontgo
; * Fix Specular Lighting
; *
; * 38 4/05/04 6:59p Clmontgo
; * Fix for directional lighting
; *
; * 37 4/05/04 2:19p Clmontgo
; * Fixes for Light w/ w != 0 or 1 and Attenuation > 1.0
; *
; * 36 3/25/04 1:27p Clmontgo
; * Optimization of Clip flag generation and VP XForm. Fix for Clipping
; * issue observed in previous version.
; Revision 1.7 2004/03/22 11:43:40 bcb
; New Intel code drop. 22/03/04
; *
; * 35 3/14/04 7:52p Clmontgo
; * Fixes for Clipping, Tex Coords, Slaveport optimizations, Clip Flag
; * Generation , and Float To Fixed Conversions.
; *
; * 34 3/10/04 4:03p Cmdoan
; * fixed colorMaterial - bad color load per vertex
; *
; * 33 3/04/04 2:34p Clmontgo
; * Fix For Color Order RGBA vs ABGR
; *
; * 32 3/02/04 1:29p Cmdoan
; * adding 4 bits to divide accuracy
; *
; * 31 3/01/04 4:39p Cmdoan
; * new fix for l_sed.c which doesn't break normal checks
; *
; * 28 2/27/04 4:27p Clmontgo
; * Fix for UBYTE color swizzle in lighting
; *
; * 27 2/23/04 9:18a Clmontgo
; *
; * 26 2/20/04 11:30a Clmontgo
; * Delayed state validation update.
; *
; * 25 2/19/04 10:56a Cmdoan
; * trying to fix l_sed.c
; *
; * 24 2/18/04 10:45a Clmontgo
; * Fixed potential Slaveport issue and ARGB to RGBA color ording for byte
; * colors
; *
; * 23 2/13/04 9:10a Clmontgo
; * Fixed Sqrt Function, Fixed Normalize normals.
; *
; * 22 2/11/04 1:48p Cmdoan
; * checking in current spot fixes
; *
; * 21 2/05/04 6:00p Cmdoan
; * fixed bad clamp code (wrong word on textrc)
; *
; * 20 2/05/04 3:40p Clmontgo
; *
; * 19 2/05/04 1:40p Cmdoan
; * removing debug statements
; *
; * 18 2/05/04 11:58a Cmdoan
; * debugging attenuation position
; *
; * 16 2/04/04 9:45p Cmdoan
; * added clamp mechanisms prior to broadcasts in wMMX
; *
; * 15 2/04/04 5:43p Cmdoan
; * debugging 0.16 lights
; *
; * 14 2/04/04 11:01a Clmontgo
; * Initial Change of Lighting to 0.16 format.
; *
; * 13 1/20/04 12:03p Clmontgo
; * Fixed Scaling on Emissive Material
; *
; * 12 1/19/04 5:34p Clmontgo
; * Added Fog Functionality
;
; 11 12/17/03 9:20a Clmontgo
; Added Version ID and log to file headers
;\* ************************************************************************ */
INCLUDE HXFState.inc ; Definitions of the HXFState Structure
;** ************************************************************************ **
;** CONSTANTS
;** ************************************************************************ **
;** ************************************************************************ **
;** IMPORTS
;** ************************************************************************ **
;** ************************************************************************ **
;** EXPORTS
;** ************************************************************************ **
EXPORT |HXFLightVertex|
EXPORT |HXFLoadUINT8DiffuseColorProc|
EXPORT |HXFLoadPACKEDDiffuseColorProc|
EXPORT |HXFLoadFIXEDDiffuseColorProc|
EXPORT |HXFLoadFIXEDNormalProc|
EXPORT |HXFLoadINT8NormalProc|
EXPORT |HXFLoadINT16NormalProc|
EXPORT |HXFLoadPACKEDNormalProc|
EXPORT |HXFLoadNormalizeFIXEDNormalProc|
EXPORT |HXFLoadNormalizeINT8NormalProc|
EXPORT |HXFLoadNormalizeINT16NormalProc|
EXPORT |HXFDirectionalLightProc|
EXPORT |HXFDirectionalLightProc_CM|
EXPORT |HXFDirectionalLightProc_S|
EXPORT |HXFDirectionalLightProc_CMS|
EXPORT |HXFPointLightProc|
EXPORT |HXFPointLightProc_CM|
EXPORT |HXFPointLightProc_S|
EXPORT |HXFPointLightProc_CMS|
EXPORT |HXFSpotLightProc|
EXPORT |HXFSpotLightProc_CM|
EXPORT |HXFSpotLightProc_S|
EXPORT |HXFSpotLightProc_CMS|
;** ************************************************************************ **
;** VARIABLES
;** ************************************************************************ **
;** ************************************************************************ **
;** MAROS
;** ************************************************************************ **
;** ************************************************************************ **
;** FUNCTIONS
;** ************************************************************************ **
AREA HXFLIGHT, CODE, READONLY
;** ************************************************************************ **
; Name: Pow
; Description: compute x^y -- x must be fixed point [0,1]
; and y must be fixed point [0, 128]
; ---------------------------------------------------------------------- --
; Register Map -
; ---------------------------------------------------------------------- --
; r0 = x r4 = r8 = r12 =
; r1 = y r5 = r9 = r13 = sp
; r2 = tmp r6 = r10 = r14 = link
; r3 = tmp r7 = r11 = r15 = pc
; ---------------------------------------------------------------------- --
; wr0 = wr4 = wr8 = wr12 = 0xFFFF0000
; wr1 = wr5 = wr9 = wr13 =
; wr2 = wr6 = wr10 = wr14 =
; wr3 = wr7 = wr11 = 0x04040404wr15 =
; wcgr0 = wcgr1 = wcgr2 = wcgr3 =
; ---------------------------------------------------------------------- --
;** ************************************************************************ **
; FIXME Make Macro
|Pow| PROC
; catch early out cases
; x^0 -> 1
cmp r1, #HFX_ONE
movlt r0, #HFX_ONE ;
movlt pc, lr ;
; x^1 -> x
cmp r1, #HFX_ONE <<1
movlt pc, lr ;
; 0^y -> 0
cmp r0, #0
moveq pc, lr ;
; 1^y -> 1
cmp r0, #HFX_ONE
moveq r0, #HFX_ONE ;
moveq pc, lr ;
; Compute whole portion of x^y
movs r1, r1, lsr #16 ; get the whole portion of y
clz r2, r1 ; compute 8-32-clz(y)
sub r2, r2, #24 ;
mov r3, r0 ; Initialize whole result
; dynamic branch to proper case - pc += 20(5*4)
add r2, r2, r2, lsl #2 ; r2 = r2 * 5
add pc, pc, r2, lsl #2 ; pc += r2*4
nop ; pad due to pc increment
; case 7
mul r2, r3, r3 ; wres = wres * wres
tst r1, #0x40
movne r3, r2, lsr #16
mulne r2, r0, r3 ; wres = wres * in_x
mov r3, r2, lsr #16
; case 6
mul r2, r3, r3 ; wres = wres * wres
tst r1, #0x20
movne r3, r2, lsr #16
mulne r2, r0, r3 ; wres = wres * in_x
mov r3, r2, lsr #16
; case 5
mul r2, r3, r3 ; wres = wres * wres
tst r1, #0x10
movne r3, r2, lsr #16
mulne r2, r0, r3 ; wres = wres * in_x
mov r3, r2, lsr #16
; case 4
mul r2, r3, r3 ; wres = wres * wres
tst r1, #0x08
movne r3, r2, lsr #16
mulne r2, r0, r3 ; wres = wres * in_x
mov r3, r2, lsr #16
; case 3
mul r2, r3, r3 ; wres = wres * wres
tst r1, #0x04
movne r3, r2, lsr #16
mulne r2, r0, r3 ; wres = wres * in_x
mov r3, r2, lsr #16
; case 2
mul r2, r3, r3 ; wres = wres * wres
tst r1, #0x02
movne r3, r2, lsr #16
mulne r2, r0, r3 ; wres = wres * in_x
mov r3, r2, lsr #16
; case 1
mul r2, r3, r3 ; wres = wres * wres
tst r1, #0x01
movne r3, r2, lsr #16
mulne r2, r0, r3 ; wres = wres * in_x
mov r3, r2, lsr #16
; case 0 - already handled.
mov r0, r3
mov pc, lr
ENDP
;** ************************************************************************ **
; Name: POINT_LIGHT
; Description:
; ---------------------------------------------------------------------- --
; Register Map -
; ---------------------------------------------------------------------- --
; r0 = pState r4 = r8 = r12 = pOutVtx
; r1 = flags r5 = r9 = r13 = sp
; r2 = r6 = r10 = r14 = link
; r3 = r7 = r11 = pLight r15 = pc
; ---------------------------------------------------------------------- --
; wr0 = out diff(D) wr4 = wr8 = VL wr12 = 0xFFFF0000
; wr1 = out spec(S) wr5 = wr9 = Att wr13 =
; wr2 = vtx Norm(N) wr6 = wr10 = wr14 = Zero
; wr3 = vtx Diff(vD)wr7 = wr11 = 0x04040404wr15 =
; wcgr0 = 16 wcgr1 = 12 wcgr2 = 8 wcgr3 = 32
; ---------------------------------------------------------------------- --
; Prototype in C: N/A
;** ************************************************************************ **
|PointLight| PROC
; ---------------------------------------------------------------------- --
; Register Map -
; ---------------------------------------------------------------------- --
; r0 = pState r4 = Vtx.z r8 = lPos.z r12 = pOutVtx
; r1 = flags r5 = Vtx.w r9 = lPos.w r13 = sp
; r2 = Vtx.x r6 = lPos.x r10 = r14 = link
; r3 = Vtx.y r7 = lpos.y r11 = pLight r15 = pc
; ---------------------------------------------------------------------- --
; wr0 = out diff(D) wr4 = wr8 = VL wr12 = 0xFFFF0000
; wr1 = out spec(S) wr5 = wr9 = Att wr13 =
; wr2 = vtx Norm(N) wr6 = wr10 = wr14 = Zero
; wr3 = vtx Diff(vD)wr7 = wr11 = 0x04040404wr15 =
; wcgr0 = 16 wcgr1 = 12 wcgr2 = 8 wcgr3 = 32
; ---------------------------------------------------------------------- --
; Calculate DSqr
ldrd r2, [r0, #HXFSTATE_OFFSET_STORAGE_VTXPOSITION_X] ; Load the Vertex X, Y
ldrd r4, [r0, #HXFSTATE_OFFSET_STORAGE_VTXPOSITION_Z] ; Load the Vertex Z, W
ldrd r6, [r11, #HXFLIGHT_OFFSET_POSITION]
ldrd r8, [r11, #HXFLIGHT_OFFSET_POSITION + 8]
wzero wr4
tmia wr4, r6, r5 ; l.x * v.w
wzero wr5
tmia wr5, r7, r5 ; l.y * v.w
wzero wr6
tmia wr6, r8, r5 ; l.z * v.w
tmia wr4, r2, r9 ; v.x * l.w
tmia wr5, r3, r9 ; v.y * l.w
tmia wr6, r4, r9 ; v.z * l.w
; Load Light Exponent
wsradg wr4, wr4, wcgr0 ; normalization of LV.x
wsradg wr5, wr5, wcgr0 ; normalization of LV.y
wsradg wr6, wr6, wcgr0 ; normalization of LV.z
textrmsw r2, wr4, #0 ; r2 = lv.x
textrmsw r3, wr5, #0 ; r3 = lv.y
textrmsw r4, wr6, #0 ; r4 = lv.z
; ---------------------------------------------------------------------- --
; Register Map -
; ---------------------------------------------------------------------- --
; r0 = pState r4 = VL.z r8 = r12 = pOutVtx
; r1 = flags r5 = DSqr r9 = r13 = sp
; r2 = VL.x r6 = r10 = r14 = link
; r3 = VL.y r7 = r11 = pLight r15 = pc
; ---------------------------------------------------------------------- --
; wr0 = out diff(D) wr4 = wr8 = VL wr12 = 0xFFFF0000
; wr1 = out spec(S) wr5 = wr9 = Att wr13 =
; wr2 = vtx Norm(N) wr6 = wr10 = wr14 = Zero
; wr3 = vtx Diff(vD)wr7 = wr11 = 0x04040404wr15 =
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -