⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 hxftransformarm.s

📁 Lido PXA270平台开发板的最新BSP,包括源代码
💻 S
📖 第 1 页 / 共 4 页
字号:

	; normalize HSpace values
    wsradg  wr0, wr0, wcgr0
    wsradg  wr1, wr1, wcgr0
    wsradg  wr2, wr2, wcgr0
    wsradg  wr3, wr3, wcgr0

	; ---------------------------------------------------------------------- --
	; Register Map - Store Clip positions
	; ---------------------------------------------------------------------- --
	; r0 =	pState      r4 =            r8 = r.w        r12 = pOutVtx        
	; r1 =  flags       r5 = r.x        r9 =            r13 = sp
	; r2 =              r6 = r.y        r10 =           r14 = 
	; r3 =              r7 = r.z        r11 = 		    r15 = pc
	; ---------------------------------------------------------------------- --
	; wr0 = v.w         wr4 =            wr8 =            wr12 = 0xFFFF0000         
	; wr1 = v.z         wr5 =            wr9 =            wr13 =  zExt 
	; wr2 = v.y         wr6 =            wr10 =           wr14 = Zero 
	; wr3 = v.x         wr7 =            wr11 = 0x04040404wr15 =   
	; wcgr0 = 16        wcgr1 = 12        wcgr2 = 8        wcgr3 = 32
	; ---------------------------------------------------------------------- --
    ; Take all high word results and place them in ARM registers. TMRRC <Rlo, Rhi, wRn>.
	ldr	r2, [r0, #HXFSTATE_OFFSET_OUTCLIPPOSOFFSET] ;

	; extract values for multiply
    textrmsw r5, wr0, #0     ; r.x
    textrmsw r6, wr1, #0     ; r.y
    textrmsw r7, wr2, #0     ; r.z
    textrmsw r8, wr3, #0     ; r.w

	; we must store the clip space vertex data for clipper.  	
	add r2, r12, r2	; calculate the output offset

	; Store the H-Space Coordinates for Clipping
	str  r5,   [r2]
	str  r6,   [r2, #4]
	str  r7,   [r2, #8]
	str  r8,   [r2, #12]

	; ---------------------------------------------------------------------- --
	; Generate ClipFlags 
	; ClipFlags is used to determine if a supplied point lies inside
	; the clip box extents
	mov 	r10, #0	  ; clear the r10 register to hold the clip flags
	; prepare for the to save the clip flags
	ldr 	r9, [r0, #HXFSTATE_OFFSET_POUTCLIPFLAGS]
	
	; compute x flags
	; x<0
	cmp r5, #0
	orrlt 	r10, r10, #HXF_VTX_CLIP_FLAG_NEG_X	

	; w-x<0
	cmp r5, r8
	orrgt 	r10, r10, #HXF_VTX_CLIP_FLAG_POS_X	

	; compute y flags
	; y<0
	cmp r6, #0
	orrlt 	r10, r10, #HXF_VTX_CLIP_FLAG_NEG_Y	

	; w-y<0
	cmp r6, r8
	orrgt 	r10, r10, #HXF_VTX_CLIP_FLAG_POS_Y	

	; compute z flags	
	; z<0
	cmp r7, #0
	orrlt 	r10, r10, #HXF_VTX_CLIP_FLAG_NEG_Z

	; w-z<0
	cmp r7, r8
	orrgt 	r10, r10, #HXF_VTX_CLIP_FLAG_POS_Z	

	; store the clip flag
	strb 	r10, [r9]				; store clipFlags

	; prefetch 4 vertices ahead
	pld 	[r9, #HXF_TNL_VTX_PREFETCH_DISTANCE<<1]

	; update the clipflag pointer
	add 	r9, r9, #1
	str 	r9, [r0, #HXFSTATE_OFFSET_POUTCLIPFLAGS]
		
	; The vertex violates any boundary then do no further processing. 
	cmp r10, #0
	bne HTV_TNL_RETURN
	
	; ---------------------------------------------------------------------- --
	; Register Map - Divide by W
	; ---------------------------------------------------------------------- --
	; r0 =	pState      r4 =            r8 = r.w        r12 = pOutVtx        
	; r1 =  flags       r5 = r.x        r9 =            r13 = sp
	; r2 =              r6 = r.y        r10 =           r14 = W Exp
	; r3 =              r7 = r.z        r11 = 		    r15 = pc
	; ---------------------------------------------------------------------- --
	; wr0 = v.w         wr4 =            wr8 =            wr12 = 0xFFFF0000         
	; wr1 = v.z         wr5 =            wr9 =            wr13 = zExt  
	; wr2 = v.y         wr6 =            wr10 =           wr14 = Zero 
	; wr3 = v.x         wr7 =            wr11 = 0x04040404wr15 =   
	; wcgr0 = 16        wcgr1 = 12        wcgr2 = 8        wcgr3 = 32
	; ---------------------------------------------------------------------- --
W_DIVIDE	; registers r9-r11, r14 free 
	; Calculate inverse W -- r8 = w -> r8 = 1/w
	
	cmp r8, #HFX_ONE; 
	beq VIEWPORT_TRANSFORM

	; Check for division by 0. 
	cmp r8, #0; 
	beq HTV_TNL_RETURN

	; Normalize w
	movge   r10, r8
	rsblt   r10, r8, #0

	clz		r14, r10
	mov		r11, r10, lsl r14 ; setup denominator for the divide

    ; Clear out the result registers so we can mult & acc 32-bit values into them.
	mov r10, #0          ; Clear the result register
	mov r9, #0x80000000	 ; set up for the divide
	
	; Do the division computation
	HXF_ONEBITDIVIDE 31, r9, r11,  r10	; 1
	HXF_ONEBITDIVIDE 30, r9, r11,  r10	; 2
    HXF_ONEBITDIVIDE 29, r9, r11,  r10 	; 3
	HXF_ONEBITDIVIDE 28, r9, r11,  r10  ; 4
	
	HXF_ONEBITDIVIDE 27, r9, r11,  r10	; 5 
	HXF_ONEBITDIVIDE 26, r9, r11,  r10	; 6  	
	HXF_ONEBITDIVIDE 25, r9, r11,  r10	; 7  
	HXF_ONEBITDIVIDE 24, r9, r11,  r10	; 8  
	
	HXF_ONEBITDIVIDE 23, r9, r11,  r10	; 9  
	HXF_ONEBITDIVIDE 22, r9, r11,  r10	; 10  
	HXF_ONEBITDIVIDE 21, r9, r11,  r10	; 11  
	HXF_ONEBITDIVIDE 20, r9, r11,  r10	; 12  
	
	HXF_ONEBITDIVIDE 19, r9, r11,  r10	; 13  
	HXF_ONEBITDIVIDE 18, r9, r11,  r10	; 14  	
	HXF_ONEBITDIVIDE 17, r9, r11,  r10	; 15  
	HXF_ONEBITDIVIDE 16, r9, r11,  r10	; 16  

	; convert back to Fixed point	-- lz in r14	
	;	ResultExp = 0 - 15 - lz(w)
	;   Result Shift to FX = 7 - ResultExp
	;   Result Shift to FX = 7 - 0 - 15 - lz(w)
	;   Result Shift to FX = 22 -lz(w)

	rsb r14, r14, #22
	mov r8, r10, lsr r14

	; Restore the proper sign to the w result.
 	cmp r8, #0
	rsblt r10, r10, #0;

	mov r10, #24
	tinsrw wr15, r10, #0	; Setup for the Shift convert 24.40 to 16.16

	; Multiply the vector components by the inverse w.
    wzero wr0
	tmia wr0, r5, r8		; r.x * 1/w
	wzero wr1
	tmia wr1, r6, r8		; r.y * 1/w
    wzero wr2
	tmia 	wr2, r7, r8		; r.z * 1/w

    wsrad  wr0, wr0, wr15
    wsrad  wr1, wr1, wr15
	wsrad  wr2, wr2, wr15
	mov r8, r8, lsr #8 ; Convert 1/w from 8.24 to 16.16

    ; Pack the results into two registers use signed saturation to clamp large 
    ; numbers.
    wpackdss wr0, wr0, wr1
    wpackdss wr2, wr2, wr14

    ; Move the results back into the appropriate ARM destination registers.
    tmrrc   r5, r6, wr0
    textrmsw r7, wr2, #0    

	; ---------------------------------------------------------------------- --
	; Register Map - Viewport Transform
	; ---------------------------------------------------------------------- --
	; r0 =	pState      r4 =            r8 = w          r12 = pOutVtx        
	; r1 =  flags       r5 =            r9 =            r13 = sp
	; r2 =              r6 =            r10 =           r14 = wExp 
	; r3 =              r7 =            r11 = 		    r15 = pc
	; ---------------------------------------------------------------------- --
	; wr0 = v.x         wr4 =            wr8 =            wr12 = 0xFFFF0000         
	; wr1 = v.y         wr5 =            wr9 =            wr13 =  zExt 
	; wr2 = v.z         wr6 =            wr10 =           wr14 = Zero 
	; wr3 = v.w         wr7 =            wr11 = 0x04040404wr15 =   
	; wcgr0 = 16        wcgr1 = 12        wcgr2 = 8        wcgr3 = 32
	; ---------------------------------------------------------------------- --
VIEWPORT_TRANSFORM	

	; Viewport transform	
	ldrd    r2, [r0, #HXFSTATE_OFFSET_VIEWPORT_XS]        ; Load VPXScale & VPYScale

	wldrw	wr0, [r0, #HXFSTATE_OFFSET_VIEWPORT_XT]        ; Load VPXTrans
	wldrw	wr1, [r0, #HXFSTATE_OFFSET_VIEWPORT_YT]        ; Load VPYTrans
	wldrw	wr2, [r0, #HXFSTATE_OFFSET_VIEWPORT_ZT]        ; Load VPZTrans

	wslldg	wr0, wr0, wcgr0
	tmia	wr0, r2, r5
	wslldg	wr1, wr1, wcgr0
	ldr    r2, [r0, #HXFSTATE_OFFSET_VIEWPORT_ZS]        ; Load VP ZScale 
	tmia	wr1, r3, r6
	wslldg	wr2, wr2, wcgr0  
	tmia	wr2, r2, r7

	wsradg	wr0, wr0, wcgr0
	wsradg	wr1, wr1, wcgr0
	wsradg	wr2, wr2, wcgr0
	
	; Store the output vertices - Only in Ortho case do we need to store verts
	wstrw wr0, [r12]
	wstrw wr1, [r12, #4]
	wstrw wr2, [r12, #8]
	str   r8, [r12, #12]

	b 		HTV_TNL_RETURN
	ENDP

;** ************************************************************************ **
;**	Copy Colors Procs
;** ************************************************************************ **
; ---------------------------------------------------------------------- --
; Register Map - Copy Diffuse Procs
; ---------------------------------------------------------------------- --
; r0 =	pState      r4 =            r8 =            r12 = pOutVtx        
; r1 =  flags       r5 =            r9 =            r13 = sp
; r2 =  pOutDif     r6 =            r10 =           r14 = 
; r3 =              r7 =            r11 = 		    r15 = pc
; ---------------------------------------------------------------------- --
; wr0 =             wr4 =            wr8 =            wr12 = 0xFFFF0000         
; wr1 =             wr5 =            wr9 =            wr13 =   
; wr2 =             wr6 =            wr10 =           wr14 = Zero 
; wr3 =             wr7 =            wr11 = 0x04040404wr15 =   
; wcgr0 = 16        wcgr1 = 12        wcgr2 = 8        wcgr3 = 32
; ---------------------------------------------------------------------- --
;** ************************************************************************ **
|HXFCopyPACKEDColorProc| PROC
	; INT16 colors are only ever from HColor4S structures which are default colors 
	; set into material
	wldrd 	wr15, [r6]

	wsrlhg 	wr15, wr15, wcgr2 ; Convert from 0.16 to 8.8 fixed point
	wpackhus wr15, wr15, wr14
	wstrw 	wr15, [r2]

	mov pc, lr
	ENDP
	
|HXFCopyUINT8ColorProc| PROC
	wldrw 	wr15, [r6]			; HXF_VF_UINT8 
	wunpckelub wr15, wr15

	wshufh	wr15, wr15, #0xC6 ; with little endian mode word translation 
							  ; RGBA is translated ABGR in on load :-)
							  ; destination is ARGB in the register
							  ; so the translation is 
							  ; ABGR(11 10 01 00)  to ARGB (11 00 01 10)
	wpackhus	wr15, wr15, wr14
	wstrw		wr15, [r2]
	
	mov pc, lr
	ENDP
	
|HXFCopyFIXEDColorProc| PROC
	; Load the input position count
	ldr r5, [r6, #12] ; load a
	ldr r8, [r6]	 ; load r
	ldr r3, [r6, #4] ; load g
	ldr r4, [r6, #8] ; load b
	
	HXF_FIXED_TO_COLORBYTE r5, r7	; A	
	mov r10, r5, lsl #24

	HXF_FIXED_TO_COLORBYTE r8, r7	; R
	orr r10, r10, r8, lsl #16

	HXF_FIXED_TO_COLORBYTE r3, r7	; G
	orr r10, r10, r3, lsl #8

	HXF_FIXED_TO_COLORBYTE r4, r7 	; B
	orr r10, r10, r4

	str r10, [r2]	; Output ARGB
	mov pc, lr
	ENDP

;** ************************************************************************ **
;**	Fogging Support
;** ************************************************************************ **
; ---------------------------------------------------------------------- --
; Register Map - Fog Procs
; ---------------------------------------------------------------------- --
; r0 =	pState      r4 =            r8 =            r12 = pOutVtx        
; r1 =  flags       r5 =            r9 =            r13 = sp
; r2 = Spec Off     r6 =            r10 =           r14 = lr (return)
; r3 =              r7 =            r11 = 		    r15 = pc
; ---------------------------------------------------------------------- --
; wr0 =             wr4 =            wr8 =            wr12 = 0xFFFF0000         
; wr1 =             wr5 =            wr9 =            wr13 =   
; wr2 =             wr6 =            wr10 =           wr14 = Zero 
; wr3 =             wr7 =            wr11 = 0x04040404wr15 =   
; wcgr0 = 16        wcgr1 = 12        wcgr2 = 8        wcgr3 = 32
; ---------------------------------------------------------------------- --
;** ************************************************************************ **
|HXFFogExpProc| PROC
	ldrd 	r4, [r0, #HXFSTATE_OFFSET_STORAGE_VTXPOSITION_X] ; Load in X,Y Position
	ldrd 	r8, [r0, #HXFSTATE_OFFSET_FOG_XFORM_X] 			; Load the Fog X, Y  Transform

	ldrd 	r6, [r0, #HXFSTATE_OFFSET_STORAGE_VTXPOSITION_Z] ; Load in Z, W Position
	ldrd 	r10, [r0, #HXFSTATE_OFFSET_FOG_XFORM_Z] 			; Load the Fog Z, W  Transform

	wzero wr0
	tmia wr0, r4, r8
	tmia wr0, r5, r9
	tmia wr0, r6, r10
	tmia wr0, r7, r11

	ldr 	r5, [r0, #HXFSTATE_OFFSET_FOG_DENSITY] ; Load the Fog End, Fog Inverse Range

	wsradg wr0, wr0, wcgr0
	textrmsw r3, wr0, #0
	
	; fFog = GLES_POWF(GLES_E, -fDensity * zPos);
	
	; abs(ed) 	
	cmp 	r3, #0
	rsblt	r3, r3, #0 

	; fp = Density * (zPos)
	smull   r4, r5, r3, r5
	mov		r5, r5, lsl #16
	orr		r3, r5, r4, lsr #16				;Concatenating Lo and Hi
	
	rsb r3, r3, #0 ; -(fp)

	; Clamp to Max
	cmp r3, #0
	movge r3, #0xFF;
	bge HTV_FOG_EXP_STORE	

	; r0 * Scale Factor(0xFFFFD1D8) -0.180310 = 1.0f/-5.546000f
	mov r4, #0xFF000000
	orr r4, r4, #0x00FF0000
	orr r4, r4, #0x0000D100
	orr r4, r4, #0x000000D8

	smull r5, r6, r3, r4 

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -