📄 hxflightarm.s

📁 Lido PXA270平台开发板的最新BSP,包括源代码
💻 S
📖 第 1 页 / 共 5 页
字号:
	wldrd	wr15, [r0, #HXFSTATE_OFFSET_EYEVECTOR]
	mov		r3, #2
	wzero wr13
	tinsrb	wr13, r3, #0
	wsrah	wr8,  wr8,  wr13	; convert to 4.12
	wsrah	wr15, wr15, wr13	; convert to 4.12
	waddh	wr8,  wr8,  wr15  ; S = Sum VL and the EyeVector

	; Normalize S
	wmacsz	wr13, wr8, wr8 ; Length^2(S) -> 8.24 format 
	textrmsw r5, wr13, #0		; result 8.24 fixed point
	
	cmp		r5, #0				; No length ; FIXME put more agressive length test in place
	moveq	pc, r10				; vectors oppose each other

	; must normalize to float - Length^2(S) to 24 bit mantissa
	clz 	r6, r5
	rsbs 	r6, r6, #8		; compute the initial exponent
	rsblt 	r3, r6, #0
	movlt 	r5, r5, lsl r3
	movgt 	r5, r5, asr r6
	sub		r6, r6, #1		; Compute the proper exponent - decimal moved one position

	HXF_INVSQRT r5, r6, r8, r9, r4 

	;  Scale S by InvLength(4.12)
	rsbs    r2,	r6, #11		; compute shift to convert to 4.12 (11(decimal shift))
	movge 	r5, r5, asr r2
	rsblt	r2, r2, #0
	movlt 	r5, r5, lsl r2

	tbcsth 	wr15, r5			; Send InvLen to wr15
	wmulsm 	wr8, wr8, wr15		; Scale S by InvLen
	mov 	r5, #4
	wzero wr15
	tinsrw wr15, r5, #0
	wsllh 	wr8, wr8, wr15		; Convert to 4.12

	; Compute SDotN
	wmacsz	 wr13, wr8, wr2 ; SdotN(6.26) = (S(4.12), vNormal(2.14) 
 	textrmsw r2, wr13, #0	; Pull out dot product

	movs 	r2, r2, asr #10  ; convert spoteffect to 16.16
	movle	pc, r10	; Check for <= Zero -- if SdotN <= Zero then no specular contibution
				; lr preserverd in r10
 	ldr		r3, [r0, #HXFSTATE_OFFSET_MATERIALPOWER]
	wldrd	wr7, [r11, #HXFLIGHT_OFFSET_SPECULAR]

	cmp		r2, #HFX_ONE		
	bge 	LIGHTSPECULAR_SPEC_POWER

	; Apply Shininess
	cmp 	r3, #HFX_ONE		; x^1 = x - by pass power if possible
	ble 	LIGHTSPECULAR_SPEC_POWER
	
; XXX FIXME Avoid the stack
	stmfd 	sp!, {r0-r1}	; FIXME Avoid the stack. 
	mov		r0, r2	
	mov		r1, r3			;
	bl 		Pow
	mov 	r2, r0
	ldmfd 	sp!, {r0-r1}
	
LIGHTSPECULAR_SPEC_POWER

	cmp 	r2, #HFX_ONE
	mvnge	r2, #0
	tbcsth	wr13, r2 	       ; send specular scale -- to wmmx

	wmulum	wr7, wr7, wr13     ; Scale by SpecScale	

	; Compute attenuated value and saturate it. 
	wmulum	wr15, wr7, wr9  ; Scale by Att
	wmulul	wr7, wr7, wr9  ; Scale by Att

	wunpckilh wr13, wr7, wr15
	wunpckihh wr15, wr7, wr15
	
	wsrlw wr15, wr15, wr10
	wsrlw wr13, wr13, wr10

	wpackwus wr7, wr13, wr15

	waddhus wr1, wr1, wr7      ; Accumulate specular contribution from this light

	mov		pc, r10		
	ENDP	

;** ************************************************************************ **
; Name:				DirLightSpecular
; Description: 		Same as LightSpecular but no specular scale
; ---------------------------------------------------------------------- --
; Register Map - Light Preamble
; ---------------------------------------------------------------------- --
; r0 =	pState      r4 =            r8 =            r12 = pOutVtx        
; r1 =  Flags       r5 =			r9 =            r13 = sp
; r2 =              r6 =			r10 = (link)    r14 = link
; r3 =              r7 =		    r11 = pLight    r15 = pc
; ---------------------------------------------------------------------- --
; wr0 = out diff(D) wr4 =            wr8 = VL         wr12 = 0xFFFF0000         
; wr1 = out spec(S) wr5 =            wr9 = Att(0.16)  wr13 =   
; wr2 = vtx Norm(N) wr6 =            wr10 = Att Shift wr14 = Zero 
; wr3 = vtx Diff(vD)wr7 =             wr11 = 0x04040404wr15 =     
; wcgr0 = 16        wcgr1 = 12        wcgr2 = 8        wcgr3 = 32
; ---------------------------------------------------------------------- --
;** ************************************************************************ **
|DirLightSpecular| PROC
	mov		r10, lr		; preserve lr

	; Compute S
	wldrd	wr15, [r0, #HXFSTATE_OFFSET_EYEVECTOR]
	mov		r3, #2
	wzero wr13
	tinsrb	wr13, r3, #0
	wsrah	wr8,  wr8,  wr13	; convert to 4.12
	wsrah	wr15, wr15, wr13	; convert to 4.12
	waddh	wr8,  wr8,  wr15  ; S = Sum VL and the EyeVector

	; Normalize S
	wmacsz	wr13, wr8, wr8 ; Length^2(S) -> 8.24 format 
	textrmsw r5, wr13, #0		; result 8.24 fixed point
	
	cmp		r5, #0				; No length ; FIXME put more agressive length test in place
	moveq	pc, r10				; vectors oppose each other

	; must normalize to float - Length^2(S) to 24 bit mantissa
	clz 	r6, r5
	rsbs 	r6, r6, #8		; compute the initial exponent
	rsblt 	r3, r6, #0
	movlt 	r5, r5, lsl r3
	movgt 	r5, r5, asr r6
	sub		r6, r6, #1		; Compute the proper exponent - decimal moved one position

	HXF_INVSQRT r5, r6, r8, r9, r4 

	;  Scale S by InvLength(4.12)
	rsbs    r2,	r6, #11		; compute shift to convert to 4.12 (11(decimal shift))
	movge 	r5, r5, asr r2
	rsblt	r2, r2, #0
	movlt 	r5, r5, lsl r2

	tbcsth 	wr15, r5			; Send InvLen to wr15
	wmulsm 	wr8, wr8, wr15		; Scale S by InvLen
	mov 	r5, #4
	wzero wr15
	tinsrw wr15, r5, #0
	wsllh 	wr8, wr8, wr15		; Convert to 4.12

	; Compute SDotN
	wmacsz	 wr13, wr8, wr2 ; SdotN(6.26) = (S(4.12), vNormal(2.14) 
 	textrmsw r2, wr13, #0	; Pull out dot product

	movs 	r2, r2, asr #10  ; convert spoteffect to 16.16
	movle	pc, r10	; Check for <= Zero -- if SdotN <= Zero then no specular contibution
				; lr preserverd in r10
 	ldr		r3, [r0, #HXFSTATE_OFFSET_MATERIALPOWER]
	wldrd	wr7, [r11, #HXFLIGHT_OFFSET_SPECULAR]

	cmp		r2, #HFX_ONE		
	bge 	DIR_LIGHTSPECULAR_SPEC_POWER

	; Apply Shininess
	cmp 	r3, #HFX_ONE		; x^1 = x - by pass power if possible
	ble 	DIR_LIGHTSPECULAR_SPEC_POWER
	
; XXX FIXME Avoid the stack
	stmfd 	sp!, {r0-r1}	; FIXME Avoid the stack. 
	mov		r0, r2	
	mov		r1, r3			;
	bl 		Pow
	mov 	r2, r0
	ldmfd 	sp!, {r0-r1}
	
DIR_LIGHTSPECULAR_SPEC_POWER

	cmp 	r2, #HFX_ONE
	mvnge	r2, #0
	tbcsth	wr13, r2 	       ; send specular scale -- to wmmx

	wmulum	wr7, wr13, wr7     ; Scale by att, spoteffect, and SpecScale	

	waddhus wr1, wr1, wr7      ; Accumulate specular contribution from this light

	mov		pc, r10		
	ENDP

;** ************************************************************************ **
; Name:				Light_*
; Description: 		
; ---------------------------------------------------------------------- --
; Register Map - Light Preamble
; ---------------------------------------------------------------------- --
; r0 =	pState      r4 =            r8 =            r12 = pOutVtx        
; r1 =  Flags       r5 =		    r9 =            r13 = sp
; r2 =              r6 =			r10 =           r14 = link
; r3 =              r7 =		    r11 = pLight    r15 = pc
; ---------------------------------------------------------------------- --
; wr0 = out diff(D) wr4 =            wr8 = VL         wr12 = 0xFFFF0000         
; wr1 = out spec(S) wr5 =            wr9 = Att(0.16)  wr13 =   
; wr2 = vtx Norm(N) wr6 =            wr10 = Att Shift wr14 = Zero 
; wr3 = vtx Diff(vD)wr7 =             wr11 = 0x04040404wr15 =     
; wcgr0 = 16        wcgr1 = 12        wcgr2 = 8        wcgr3 = 32
; ---------------------------------------------------------------------- --
;** ************************************************************************ **
|Light| PROC
	; ************* Compute Light Diffuse Contributions
	; Availiable - r2-r9, r12, r14
	wldrd	wr13, [r11, #HXFLIGHT_OFFSET_AMBIENT]
	wldrd	wr7, [r11, #HXFLIGHT_OFFSET_DIFFUSE]

	; ************* Compute Light Ambient Contribution
	wmulum	wr6, wr3, wr13 ;  Light Ambient * Vertex Ambient

	; ************* Compute Light Diffuse Contribution
	wmacsz	wr13, wr8, wr2 ; LdotN = VL dot N 
	; Convert LdotN to 4.12 and pack a full register with 16 bit values
	; currently in a 4.28 format from dp of 2.14*2.14->4.28 
	wsrawg	wr13, wr13, wcgr1 ; convert to 0.16 - shift right by 12

	textrcw r15, #0			; Check for LdotN <= Zero -> max(LdotN, 0)
	ble LIGHT_ACCDIFFUSE	; if LdotN is <= 0 then skip remaining calcs

	wand wr15, wr13, wr12	; wr13 AND 0xFFFF0000 test for >= 1.0
	textrcw r15, #1
	wsrlwgne wr13, wr12, wcgr0 ; Saturate > 1.0

	wshufh	wr13, wr13, #00 ; replicate the value to all 4 halfword slots

	; ** LC = Light*Mat
	wmulum	wr7, wr3, wr7	 ;  Light Diffuse * Vertex Diffuse

	; ** LC = C*LdotN
	wmulum	wr7, wr7, wr13  ; Scale by LdotN

	; ** LC = C*Att
	waddhus	wr6, wr6, wr7	 ; Accumulate in to Lights current diffuse

	; Compute attenuated value and saturate it. 
	wmulum	wr6, wr6, wr9  ; Scale by Att
	wmulul	wr7, wr6, wr9  ; Scale by Att

	wunpckihh wr15, wr7, wr6
	wunpckilh wr13, wr7, wr6
	
	wsrlw wr15, wr15, wr10
	wsrlw wr13, wr13, wr10

	wpackwus wr6, wr13, wr15

	; ************* Accumulate to Output diffuse			
LIGHT_ACCDIFFUSE
	waddhus wr0, wr0, wr6  ; Accumulate diffuse contribution from this light

	b NEXT_LIGHT 
	ENDP

|Light_S| PROC
	; ************* Compute Light Diffuse Contributions
	; Availiable - r2-r9, r12, r14
	wldrd	wr13, [r11, #HXFLIGHT_OFFSET_AMBIENT]
	wldrd	wr7, [r11, #HXFLIGHT_OFFSET_DIFFUSE]

	; ************* Compute Light Ambient Contribution
	wmulum	wr6, wr3, wr13 ;  Light Ambient * Vertex Ambient

	; ************* Compute Light Diffuse Contribution
	wmacsz	wr13, wr8, wr2 ; LdotN = VL dot N 
	; Convert LdotN to 4.12 and pack a full register with 16 bit values
	; currently in a 4.28 format from dp of 2.14*2.14->4.28 
	wsrawg	wr13, wr13, wcgr1 ; convert to 0.16 - shift right by 12

	textrcw r15, #0			; Check for LdotN <= Zero -> max(LdotN, 0)
	ble LIGHT_S_ACCDIFFUSE	; if LdotN is <= 0 then skip remaining calcs

	wand wr15, wr13, wr12	; wr13 AND 0xFFFF0000 test for >= 1.0
	textrcw r15, #1
	wsrlwgne wr13, wr12, wcgr0 ; Saturate > 1.0

	wshufh	wr13, wr13, #00 ; replicate the value to all 4 halfword slots

	; ** LC = Light*Mat
	wmulum	wr7, wr3, wr7	 ;  Light Diffuse * Vertex Diffuse
	; ** LC = C*LdotN
	wmulum	wr7, wr7, wr13  ; Scale by LdotN
	waddhus	wr6, wr6, wr7	 ; Accumulate in to Lights current diffuse

	; ** LC = C*Att
	; Compute attenuated value and saturate it. 
	wmulum	wr6, wr6, wr9  ; Scale by Att
	wmulul	wr7, wr6, wr9  ; Scale by Att

	wunpckihh wr15, wr7, wr6
	wunpckilh wr13, wr7, wr6
	
	wsrlw wr15, wr15, wr10
	wsrlw wr13, wr13, wr10

	wpackwus wr6, wr13, wr15

	bl LightSpecular	;	
	; ************* Accumulate to Output diffuse			
LIGHT_S_ACCDIFFUSE
	waddhus wr0, wr0, wr6  ; Accumulate diffuse contribution from this light

	b NEXT_LIGHT 
	ENDP

|Light_CM| PROC
	; ************* Compute Light Diffuse Contributions
	; Availiable - r2-r9          
	wldrd	wr6, [r11, #HXFLIGHT_OFFSET_AMBIENT] ; Initialize light diffuse with light ambient
	wldrd	wr7, [r11, #HXFLIGHT_OFFSET_DIFFUSE]

	; ************* Compute Light Diffuse Contribution
 	wmacsz	wr13, wr8, wr2 ; LdotN = VL dot N 

	; Convert LdotN to 4.12 and pack a full register with 16 bit values
	; currently in a 4.28 format from dp of 2.14*2.14->4.28 
	wsrawg	wr13, wr13, wcgr1 ; convert to 0.16 - shift right by 12

	textrcw r15, #0			; Check for LdotN <= Zero -> max(LdotN, 0)
	ble LIGHT_CM_ACCDIFFUSE	; if LdotN is <= 0 then skip remaining calcs

	wand wr15, wr13, wr12	; wr13 AND 0xFFFF0000 test for >= 1.0
	textrcw r15, #1
	wsrlwgne wr13, wr12, wcgr0 ; Saturate > 1.0

	wshufh	wr13, wr13, #00 ; replicate the value to all 4 halfword slots
	
	; ** LC = C*LdotN
	wmulum	wr7, wr7, wr13  ; Scale by LdotN
	waddhus	wr6, wr6, wr7	 ; Accumulate in to Lights current diffuse

	; ** LC = C*Att
	; Compute attenuated value and saturate it. 
	wmulum	wr6, wr6, wr9  ; Scale by Att
	wmulul	wr7, wr6, wr9  ; Scale by Att

	wunpckihh wr15, wr7, wr6
	wunpckilh wr13, wr7, wr6	

	wsrlw wr15, wr15, wr10
	wsrlw wr13, wr13, wr10
	
	wpackwus wr6, wr13, wr15

	; ************* Accumulate to Output diffuse			
LIGHT_CM_ACCDIFFUSE
	waddhus wr0, wr0, wr6  ; Accumulate diffuse contribution from this light

	b NEXT_LIGHT 
	ENDP

|Light_CMS| PROC
	; ************* Compute Light Diffuse Contributions
	; Availiable - r2-r9          
	wldrd	wr6, [r11, #HXFLIGHT_OFFSET_AMBIENT] ; Initialize light diffuse with light ambient
	wldrd	wr7, [r11, #HXFLIGHT_OFFSET_DIFFUSE]

	; ************* Compute Light Diffuse Contribution
	wmacsz	wr13, wr8, wr2 ; LdotN = VL dot N 
	; Convert LdotN to 0.16 and pack a full register with 16 bit values
	; currently in a 4.28 format from dp of 2.14*2.14->4.28 
	wsrawg	wr13, wr13, wcgr1 ; convert to 0.16 - shift right by 12

	textrcw r15, #0			; Check for LdotN <= Zero -> max(LdotN, 0)
	ble LIGHT_CMS_ACCDIFFUSE	; if LdotN is <= 0 then skip remaining calcs

	wand wr15, wr13, wr12	; wr13 AND 0xFFFF0000 test for >= 1.0
	textrcw r15, #1
	wsrlwgne wr13, wr12, wcgr0 ; Saturate > 1.0

	wshufh	wr13, wr13, #00 ; replicate the value to all 4 halfword slots

	; ** LC = C*LdotN
	wmulum	wr7, wr7, wr13  ; Scale by LdotN
	; ** LC = C*Att
	waddhus	wr6, wr6, wr7	 ; Accumulate in to Lights current diffuse
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -