📄 hxflightarm.s
字号:
; Compute attenuated value and saturate it.
wmulum wr6, wr6, wr9 ; Scale by Att
wmulul wr7, wr6, wr9 ; Scale by Att
wunpckihh wr15, wr7, wr6
wunpckilh wr13, wr7, wr6
wsrlw wr15, wr15, wr10
wsrlw wr13, wr13, wr10
wpackwus wr6, wr13, wr15
bl LightSpecular ;
; ************* Accumulate to Output diffuse
LIGHT_CMS_ACCDIFFUSE
waddhus wr0, wr0, wr6 ; Accumulate diffuse contribution from this light
b NEXT_LIGHT
ENDP
;** ************************************************************************ **
; Name: Compute*Light_*
; Description:
; ---------------------------------------------------------------------- --
; Register Map - Light Preamble
; ---------------------------------------------------------------------- --
; r0 = pState r4 = r8 = r12 = pOutVtx
; r1 = Flags r5 = Input Fmt r9 = r13 = sp
; r2 = r6 = Load Addr r10 = r14 = link
; r3 = r7 = Preserve r11 = pLight r15 = pc
; ---------------------------------------------------------------------- --
; wr0 = out diff(D) wr4 = wr8 = VL wr12 = 0xFFFF0000
; wr1 = out spec(S) wr5 = wr9 = Att(0.16) wr13 =
; wr2 = vtx Norm(N) wr6 = wr10 = wr14 = Zero
; wr3 = vtx Diff(vD)wr7 = wr11 = 0x04040404wr15 =
; wcgr0 = 16 wcgr1 = 12 wcgr2 = 8 wcgr3 = 32
; ---------------------------------------------------------------------- --
;** ************************************************************************ **
|HXFDirectionalLightProc| PROC
; ************* Compute Light Diffuse Contributions
; Availiable - r2-r9, r12, r14
wldrd wr13, [r11, #HXFLIGHT_OFFSET_AMBIENT]
wldrd wr7, [r11, #HXFLIGHT_OFFSET_DIFFUSE]
; ************* Compute Light Ambient Contribution
wmulum wr6, wr3, wr13 ; Light Ambient * Vertex Ambient
; ************* Compute Light Diffuse Contribution
wmacsz wr13, wr8, wr2 ; LdotN = VL dot N
; Convert LdotN to 4.12 and pack a full register with 16 bit values
; currently in a 4.28 format from dp of 2.14*2.14->4.28
wsrawg wr13, wr13, wcgr1 ; convert to 0.16 - shift right by 12
textrcw r15, #0 ; Check for LdotN <= Zero -> max(LdotN, 0)
ble HXFDIRECTIONALLIGHTPROC_ACCDIFFUSE ; if LdotN is <= 0 then skip remaining calcs
wand wr15, wr13, wr12 ; wr13 AND 0xFFFF0000 test for >= 1.0
textrcw r15, #1
wsrlwgne wr13, wr12, wcgr0 ; Saturate > 1.0
wshufh wr13, wr13, #00 ; replicate the value to all 4 halfword slots
; ** LC = Light*Mat
wmulum wr7, wr3, wr7 ; Light Diffuse * Vertex Diffuse
; ** LC = C*LdotN
wmulum wr7, wr7, wr13 ; Scale by LdotN
waddhus wr6, wr6, wr7 ; Accumulate in to Lights current diffuse
; ************* Accumulate to Output diffuse
HXFDIRECTIONALLIGHTPROC_ACCDIFFUSE
waddhus wr0, wr0, wr6 ; Accumulate diffuse contribution from this light
b NEXT_LIGHT
ENDP
|HXFDirectionalLightProc_CM| PROC
; ************* Compute Light Diffuse Contributions
; Availiable - r2-r9
wldrd wr6, [r11, #HXFLIGHT_OFFSET_AMBIENT] ; Initialize light diffuse with light ambient
wldrd wr7, [r11, #HXFLIGHT_OFFSET_DIFFUSE]
wldrd wr8, [r11, #HXFLIGHT_OFFSET_DIRECTION] ; Load VL
; ************* Compute Light Diffuse Contribution
wmacsz wr13, wr8, wr2 ; LdotN = VL dot N
; Convert LdotN to 4.12 and pack a full register with 16 bit values
; currently in a 4.28 format from dp of 2.14*2.14->4.28
wsrawg wr13, wr13, wcgr1 ; convert to 0.16 - shift right by 12
textrcw r15, #0 ; Check for LdotN <= Zero -> max(LdotN, 0)
ble HXFDIRECTIONALLIGHTPROC_CM_ACCDIFFUSE ; if LdotN is <= 0 then skip remaining calcs
wand wr15, wr13, wr12 ; wr13 AND 0xFFFF0000 test for >= 1.0
textrcw r15, #1
wsrlwgne wr13, wr12, wcgr0 ; Saturate > 1.0
wshufh wr13, wr13, #00 ; replicate the value to all 4 halfword slots
; ** LC = C*LdotN
wmulum wr7, wr7, wr13 ; Scale by LdotN
waddhus wr6, wr6, wr7 ; Accumulate in to Lights current diffuse
; ************* Accumulate to Output diffuse
HXFDIRECTIONALLIGHTPROC_CM_ACCDIFFUSE
waddhus wr0, wr0, wr6 ; Accumulate diffuse contribution from this light
b NEXT_LIGHT
ENDP
|HXFDirectionalLightProc_S| PROC
; ************* Compute Light Diffuse Contributions
; Availiable - r2-r9, r12, r14
wldrd wr13, [r11, #HXFLIGHT_OFFSET_AMBIENT]
wldrd wr7, [r11, #HXFLIGHT_OFFSET_DIFFUSE]
wldrd wr8, [r11, #HXFLIGHT_OFFSET_DIRECTION] ; Load VL
; ************* Compute Light Ambient Contribution
wmulum wr6, wr3, wr13 ; Light Ambient * Vertex Ambient
; ************* Compute Light Diffuse Contribution
wmacsz wr13, wr8, wr2 ; LdotN = VL dot N
; Convert LdotN to 4.12 and pack a full register with 16 bit values
; currently in a 4.28 format from dp of 2.14*2.14->4.28
wsrawg wr13, wr13, wcgr1 ; convert to 0.16 - shift right by 12
textrcw r15, #0 ; Check for LdotN <= Zero -> max(LdotN, 0)
ble HXFDIRECTIONALLIGHTPROC_S_ACCDIFFUSE ; if LdotN is <= 0 then skip remaining calcs
wand wr15, wr13, wr12 ; wr13 AND 0xFFFF0000 test for >= 1.0
textrcw r15, #1
wsrlwgne wr13, wr12, wcgr0 ; Saturate > 1.0
wshufh wr13, wr13, #00 ; replicate the value to all 4 halfword slots
; ** LC = Light*Mat
wmulum wr7, wr3, wr7 ; Light Diffuse * Vertex Diffuse
; ** LC = C*LdotN
wmulum wr7, wr7, wr13 ; Scale by LdotN
waddhus wr6, wr6, wr7 ; Accumulate in to Lights current diffuse
bl DirLightSpecular ;
; ************* Accumulate to Output diffuse
HXFDIRECTIONALLIGHTPROC_S_ACCDIFFUSE
waddhus wr0, wr0, wr6 ; Accumulate diffuse contribution from this light
b NEXT_LIGHT
ENDP
|HXFDirectionalLightProc_CMS| PROC
; ; ************* Compute Light Diffuse Contributions
; Availiable - r2-r9
wldrd wr6, [r11, #HXFLIGHT_OFFSET_AMBIENT] ; Initialize light diffuse with light ambient
wldrd wr7, [r11, #HXFLIGHT_OFFSET_DIFFUSE]
wldrd wr8, [r11, #HXFLIGHT_OFFSET_DIRECTION] ; Load VL
; ************* Compute Light Diffuse Contribution
wmacsz wr13, wr8, wr2 ; LdotN = VL dot N
; Convert LdotN to 0.16 and pack a full register with 16 bit values
; currently in a 4.28 format from dp of 2.14*2.14->4.28
wsrawg wr13, wr13, wcgr1 ; convert to 0.16 - shift right by 12
textrcw r15, #0 ; Check for LdotN <= Zero -> max(LdotN, 0)
ble HXFDIRECTIONALLIGHTPROC_CMS_ACCDIFFUSE ; if LdotN is <= 0 then skip remaining calcs
wand wr15, wr13, wr12 ; wr13 AND 0xFFFF0000 test for >= 1.0
textrcw r15, #1
wsrlwgne wr13, wr12, wcgr0 ; Saturate > 1.0
wshufh wr13, wr13, #00 ; replicate the value to all 4 halfword slots
; ** LC = C*LdotN
wmulum wr7, wr7, wr13 ; Scale by LdotN
waddhus wr6, wr6, wr7 ; Accumulate in to Lights current diffuse
bl DirLightSpecular ;
; ************* Accumulate to Output diffuse
HXFDIRECTIONALLIGHTPROC_CMS_ACCDIFFUSE
waddhus wr0, wr0, wr6 ; Accumulate diffuse contribution from this
b NEXT_LIGHT
ENDP
|HXFPointLightProc| PROC
bl PointLight
b Light
ENDP
|HXFPointLightProc_CM| PROC
bl PointLight
b Light_CM
ENDP
|HXFPointLightProc_S| PROC
bl PointLight
b Light_S
ENDP
|HXFPointLightProc_CMS| PROC
bl PointLight
b Light_CMS
ENDP
|HXFSpotLightProc| PROC
bl PointLight
bl SpotEffect
b Light
ENDP
|HXFSpotLightProc_CM| PROC
bl PointLight
bl SpotEffect
b Light_CM
ENDP
|HXFSpotLightProc_S| PROC
bl PointLight
bl SpotEffect
b Light_S
ENDP
|HXFSpotLightProc_CMS| PROC
bl PointLight
bl SpotEffect
b Light_CMS
ENDP
;** ************************************************************************ **
; Name: Load*NormalProc
; Description: Load normals into HNormal format in wr2
; ---------------------------------------------------------------------- --
; Register Map - Light Preamble
; ---------------------------------------------------------------------- --
; r0 = pState r4 = r8 = r12 = pOutVtx
; r1 = Flags r5 = r9 = r13 = sp
; r2 = r6 = Load Addr r10 = r14 = link
; r3 = r7 = r11 = pLight r15 = pc
; ---------------------------------------------------------------------- --
; wr0 = out diff(D) wr4 = wr8 = VL wr12 = 0xFFFF0000
; wr1 = out spec(S) wr5 = wr9 = Att(0.16) wr13 =
; wr2 = vtx Norm(N) wr6 = wr10 = wr14 = Zero
; wr3 = vtx Diff(vD)wr7 = wr11 = 0x04040404wr15 =
; wcgr0 = 16 wcgr1 = 12 wcgr2 = 8 wcgr3 = 32
; ---------------------------------------------------------------------- --
;** ************************************************************************ **
|HXFLoadFIXEDNormalProc| PROC
wldrw wr6, [r6] ; load n.x
wldrw wr7, [r6, #4] ; load n.y
wldrw wr8, [r6, #8] ; load n.z
mov r2, #2 ;
wzero wr15
tinsrw wr15, r2, #0
wsrad wr6, wr6, wr15
wsrad wr7, wr7, wr15
wsrad wr8, wr8, wr15
wunpckilh wr6, wr6, wr8
wunpckilh wr7, wr7, wr14
wunpckilh wr2, wr6, wr7
b LOAD_NORMAL_RETURN
ENDP
|HXFLoadINT8NormalProc| PROC
ldrsb r7, [r6] ; load n.x
ldrsb r8, [r6, #1] ; load n.y
ldrsb r9, [r6, #2] ; load n.z
mov r7, r7, lsl #7
mov r8, r8, lsl #7
mov r9, r9, lsl #7
wzero wr6
wzero wr7
wzero wr8
tinsrw wr6, r7, #0
tinsrw wr7, r8, #0
tinsrw wr8, r9, #0
wunpckilh wr6, wr6, wr8
wunpckilh wr7, wr7, wr14
wunpckilh wr2, wr6, wr7
b LOAD_NORMAL_RETURN
ENDP
|HXFLoadINT16NormalProc| PROC
ldrsh r7, [r6] ; load n.x
ldrsh r8, [r6, #2] ; load n.y
ldrsh r9, [r6, #4] ; load n.z
mov r7, r7, asr #1
mov r8, r8, asr #1
mov r9, r9, asr #1
wzero wr6
wzero wr7
wzero wr8
tinsrw wr6, r7, #0
tinsrw wr7, r8, #0
tinsrw wr8, r9, #0
wunpckilh wr6, wr6, wr8
wunpckilh wr7, wr7, wr14
wunpckilh wr2, wr6, wr7
b LOAD_NORMAL_RETURN
ENDP
|HXFLoadPACKEDNormalProc| PROC
wldrd wr2, [r6]
b LOAD_NORMAL_RETURN
ENDP
;** ************************************************************************ **
; Name: NormalizeNormal
; Description:
; ---------------------------------------------------------------------- --
; Register Map -
; ---------------------------------------------------------------------- --
; r0 = pState r4 = n.z r8 = r12 = pOutVtx
; r1 = Flags r5 = nExp r9 = r13 = sp
; r2 = n.x r6 = nSqr->Inv r10 = r14 = link
; r3 = n.y r7 = NSqrExp r11 = pLight r15 = pc
; ---------------------------------------------------------------------- --
; wr0 = out diff(D) wr4 = wr8 = VL wr12 = 0xFFFF0000
; wr1 = out spec(S) wr5 = wr9 = Att(0.16) wr13 =
; wr2 = vtx Norm(N) wr6 = wr10 = wr14 = Zero
; wr3 = vtx Diff(vD)wr7 = wr11 = 0x04040404wr15 =
; wcgr0 = 16 wcgr1 = 12 wcgr2 = 8 wcgr3 = 32
; ---------------------------------------------------------------------- --
;** ************************************************************************ **
|NormalizeNormal| PROC
; *************
; Compute Length Squared
wzero wr15
tmia wr15, r2, r2
tmia wr15, r3, r3
tmia wr15, r4, r4
wslldg wr15, wr15, wcgr2 ; Partial normalization of DSqr
textrmsw r6, wr15, #1 ; r6 =
; *************
; Normalize NSqr Mantissa - must be positive as it is the sum of squares
clz r8, r6
rsbs r8, r8, #8
add r7, r8, r5, lsl #1 ; Compute NSqrExp = lPos.Exp*2 + (8-sclz(NormLen))+1(due to normalization)
add r7, r7, #1
rsblt r8, r8, #0
tinsrw wr13, r8, #0
wslldlt wr15, wr15, wr13
wsradgt wr15, wr15, wr13
textrmsw r6, wr15, #1 ; r6 =
HXF_INVSQRT r6, r7, r8, r9, r10
; *************
; Normalize the vector
wzero wr6
wzero wr7
wzero wr8
tmia wr6, r2, r6 ; N.x = InvN * X - r2 Free
add r9, r5, r7 ; nshift = 32 -(nExp + InvLExp);
tmia wr7, r3, r6 ; N.y = InvN * Y - r3 Free
rsb r9, r9, #32
tmia wr8, r4, r6 ; N.z = InvN * Z - r4, r7 Free
wzero wr15
tinsrw wr15, r9, #0 ; r9 - Free
wsrad wr6, wr6, wr15 ; Convert to 16 bits 2.14(?)
wsrad wr7, wr7, wr15
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -