📄 hxftransformarm.s
字号:
; normalize HSpace values
wsradg wr0, wr0, wcgr0
wsradg wr1, wr1, wcgr0
wsradg wr2, wr2, wcgr0
wsradg wr3, wr3, wcgr0
; ---------------------------------------------------------------------- --
; Register Map - Store Clip positions
; ---------------------------------------------------------------------- --
; r0 = pState r4 = r8 = r.w r12 = pOutVtx
; r1 = flags r5 = r.x r9 = r13 = sp
; r2 = r6 = r.y r10 = r14 =
; r3 = r7 = r.z r11 = r15 = pc
; ---------------------------------------------------------------------- --
; wr0 = v.w wr4 = wr8 = wr12 = 0xFFFF0000
; wr1 = v.z wr5 = wr9 = wr13 = zExt
; wr2 = v.y wr6 = wr10 = wr14 = Zero
; wr3 = v.x wr7 = wr11 = 0x04040404wr15 =
; wcgr0 = 16 wcgr1 = 12 wcgr2 = 8 wcgr3 = 32
; ---------------------------------------------------------------------- --
; Take all high word results and place them in ARM registers. TMRRC <Rlo, Rhi, wRn>.
ldr r2, [r0, #HXFSTATE_OFFSET_OUTCLIPPOSOFFSET] ;
; extract values for multiply
textrmsw r5, wr0, #0 ; r.x
textrmsw r6, wr1, #0 ; r.y
textrmsw r7, wr2, #0 ; r.z
textrmsw r8, wr3, #0 ; r.w
; we must store the clip space vertex data for clipper.
add r2, r12, r2 ; calculate the output offset
; Store the H-Space Coordinates for Clipping
str r5, [r2]
str r6, [r2, #4]
str r7, [r2, #8]
str r8, [r2, #12]
; ---------------------------------------------------------------------- --
; Generate ClipFlags
; ClipFlags is used to determine if a supplied point lies inside
; the clip box extents
mov r10, #0 ; clear the r10 register to hold the clip flags
; prepare for the to save the clip flags
ldr r9, [r0, #HXFSTATE_OFFSET_POUTCLIPFLAGS]
; compute x flags
; x<0
cmp r5, #0
orrlt r10, r10, #HXF_VTX_CLIP_FLAG_NEG_X
; w-x<0
cmp r5, r8
orrgt r10, r10, #HXF_VTX_CLIP_FLAG_POS_X
; compute y flags
; y<0
cmp r6, #0
orrlt r10, r10, #HXF_VTX_CLIP_FLAG_NEG_Y
; w-y<0
cmp r6, r8
orrgt r10, r10, #HXF_VTX_CLIP_FLAG_POS_Y
; compute z flags
; z<0
cmp r7, #0
orrlt r10, r10, #HXF_VTX_CLIP_FLAG_NEG_Z
; w-z<0
cmp r7, r8
orrgt r10, r10, #HXF_VTX_CLIP_FLAG_POS_Z
; store the clip flag
strb r10, [r9] ; store clipFlags
; prefetch 4 vertices ahead
pld [r9, #HXF_TNL_VTX_PREFETCH_DISTANCE<<1]
; update the clipflag pointer
add r9, r9, #1
str r9, [r0, #HXFSTATE_OFFSET_POUTCLIPFLAGS]
; The vertex violates any boundary then do no further processing.
cmp r10, #0
bne HTV_TNL_RETURN
; ---------------------------------------------------------------------- --
; Register Map - Divide by W
; ---------------------------------------------------------------------- --
; r0 = pState r4 = r8 = r.w r12 = pOutVtx
; r1 = flags r5 = r.x r9 = r13 = sp
; r2 = r6 = r.y r10 = r14 = W Exp
; r3 = r7 = r.z r11 = r15 = pc
; ---------------------------------------------------------------------- --
; wr0 = v.w wr4 = wr8 = wr12 = 0xFFFF0000
; wr1 = v.z wr5 = wr9 = wr13 = zExt
; wr2 = v.y wr6 = wr10 = wr14 = Zero
; wr3 = v.x wr7 = wr11 = 0x04040404wr15 =
; wcgr0 = 16 wcgr1 = 12 wcgr2 = 8 wcgr3 = 32
; ---------------------------------------------------------------------- --
W_DIVIDE ; registers r9-r11, r14 free
; Calculate inverse W -- r8 = w -> r8 = 1/w
cmp r8, #HFX_ONE;
beq VIEWPORT_TRANSFORM
; Check for division by 0.
cmp r8, #0;
beq HTV_TNL_RETURN
; Normalize w
movge r10, r8
rsblt r10, r8, #0
clz r14, r10
mov r11, r10, lsl r14 ; setup denominator for the divide
; Clear out the result registers so we can mult & acc 32-bit values into them.
mov r10, #0 ; Clear the result register
mov r9, #0x80000000 ; set up for the divide
; Do the division computation
HXF_ONEBITDIVIDE 31, r9, r11, r10 ; 1
HXF_ONEBITDIVIDE 30, r9, r11, r10 ; 2
HXF_ONEBITDIVIDE 29, r9, r11, r10 ; 3
HXF_ONEBITDIVIDE 28, r9, r11, r10 ; 4
HXF_ONEBITDIVIDE 27, r9, r11, r10 ; 5
HXF_ONEBITDIVIDE 26, r9, r11, r10 ; 6
HXF_ONEBITDIVIDE 25, r9, r11, r10 ; 7
HXF_ONEBITDIVIDE 24, r9, r11, r10 ; 8
HXF_ONEBITDIVIDE 23, r9, r11, r10 ; 9
HXF_ONEBITDIVIDE 22, r9, r11, r10 ; 10
HXF_ONEBITDIVIDE 21, r9, r11, r10 ; 11
HXF_ONEBITDIVIDE 20, r9, r11, r10 ; 12
HXF_ONEBITDIVIDE 19, r9, r11, r10 ; 13
HXF_ONEBITDIVIDE 18, r9, r11, r10 ; 14
HXF_ONEBITDIVIDE 17, r9, r11, r10 ; 15
HXF_ONEBITDIVIDE 16, r9, r11, r10 ; 16
; convert back to Fixed point -- lz in r14
; ResultExp = 0 - 15 - lz(w)
; Result Shift to FX = 7 - ResultExp
; Result Shift to FX = 7 - 0 - 15 - lz(w)
; Result Shift to FX = 22 -lz(w)
rsb r14, r14, #22
mov r8, r10, lsr r14
; Restore the proper sign to the w result.
cmp r8, #0
rsblt r10, r10, #0;
mov r10, #24
tinsrw wr15, r10, #0 ; Setup for the Shift convert 24.40 to 16.16
; Multiply the vector components by the inverse w.
wzero wr0
tmia wr0, r5, r8 ; r.x * 1/w
wzero wr1
tmia wr1, r6, r8 ; r.y * 1/w
wzero wr2
tmia wr2, r7, r8 ; r.z * 1/w
wsrad wr0, wr0, wr15
wsrad wr1, wr1, wr15
wsrad wr2, wr2, wr15
mov r8, r8, lsr #8 ; Convert 1/w from 8.24 to 16.16
; Pack the results into two registers use signed saturation to clamp large
; numbers.
wpackdss wr0, wr0, wr1
wpackdss wr2, wr2, wr14
; Move the results back into the appropriate ARM destination registers.
tmrrc r5, r6, wr0
textrmsw r7, wr2, #0
; ---------------------------------------------------------------------- --
; Register Map - Viewport Transform
; ---------------------------------------------------------------------- --
; r0 = pState r4 = r8 = w r12 = pOutVtx
; r1 = flags r5 = r9 = r13 = sp
; r2 = r6 = r10 = r14 = wExp
; r3 = r7 = r11 = r15 = pc
; ---------------------------------------------------------------------- --
; wr0 = v.x wr4 = wr8 = wr12 = 0xFFFF0000
; wr1 = v.y wr5 = wr9 = wr13 = zExt
; wr2 = v.z wr6 = wr10 = wr14 = Zero
; wr3 = v.w wr7 = wr11 = 0x04040404wr15 =
; wcgr0 = 16 wcgr1 = 12 wcgr2 = 8 wcgr3 = 32
; ---------------------------------------------------------------------- --
VIEWPORT_TRANSFORM
; Viewport transform
ldrd r2, [r0, #HXFSTATE_OFFSET_VIEWPORT_XS] ; Load VPXScale & VPYScale
wldrw wr0, [r0, #HXFSTATE_OFFSET_VIEWPORT_XT] ; Load VPXTrans
wldrw wr1, [r0, #HXFSTATE_OFFSET_VIEWPORT_YT] ; Load VPYTrans
wldrw wr2, [r0, #HXFSTATE_OFFSET_VIEWPORT_ZT] ; Load VPZTrans
wslldg wr0, wr0, wcgr0
tmia wr0, r2, r5
wslldg wr1, wr1, wcgr0
ldr r2, [r0, #HXFSTATE_OFFSET_VIEWPORT_ZS] ; Load VP ZScale
tmia wr1, r3, r6
wslldg wr2, wr2, wcgr0
tmia wr2, r2, r7
wsradg wr0, wr0, wcgr0
wsradg wr1, wr1, wcgr0
wsradg wr2, wr2, wcgr0
; Store the output vertices - Only in Ortho case do we need to store verts
wstrw wr0, [r12]
wstrw wr1, [r12, #4]
wstrw wr2, [r12, #8]
str r8, [r12, #12]
b HTV_TNL_RETURN
ENDP
;** ************************************************************************ **
;** Copy Colors Procs
;** ************************************************************************ **
; ---------------------------------------------------------------------- --
; Register Map - Copy Diffuse Procs
; ---------------------------------------------------------------------- --
; r0 = pState r4 = r8 = r12 = pOutVtx
; r1 = flags r5 = r9 = r13 = sp
; r2 = pOutDif r6 = r10 = r14 =
; r3 = r7 = r11 = r15 = pc
; ---------------------------------------------------------------------- --
; wr0 = wr4 = wr8 = wr12 = 0xFFFF0000
; wr1 = wr5 = wr9 = wr13 =
; wr2 = wr6 = wr10 = wr14 = Zero
; wr3 = wr7 = wr11 = 0x04040404wr15 =
; wcgr0 = 16 wcgr1 = 12 wcgr2 = 8 wcgr3 = 32
; ---------------------------------------------------------------------- --
;** ************************************************************************ **
|HXFCopyPACKEDColorProc| PROC
; INT16 colors are only ever from HColor4S structures which are default colors
; set into material
wldrd wr15, [r6]
wsrlhg wr15, wr15, wcgr2 ; Convert from 0.16 to 8.8 fixed point
wpackhus wr15, wr15, wr14
wstrw wr15, [r2]
mov pc, lr
ENDP
|HXFCopyUINT8ColorProc| PROC
wldrw wr15, [r6] ; HXF_VF_UINT8
wunpckelub wr15, wr15
wshufh wr15, wr15, #0xC6 ; with little endian mode word translation
; RGBA is translated ABGR in on load :-)
; destination is ARGB in the register
; so the translation is
; ABGR(11 10 01 00) to ARGB (11 00 01 10)
wpackhus wr15, wr15, wr14
wstrw wr15, [r2]
mov pc, lr
ENDP
|HXFCopyFIXEDColorProc| PROC
; Load the input position count
ldr r5, [r6, #12] ; load a
ldr r8, [r6] ; load r
ldr r3, [r6, #4] ; load g
ldr r4, [r6, #8] ; load b
HXF_FIXED_TO_COLORBYTE r5, r7 ; A
mov r10, r5, lsl #24
HXF_FIXED_TO_COLORBYTE r8, r7 ; R
orr r10, r10, r8, lsl #16
HXF_FIXED_TO_COLORBYTE r3, r7 ; G
orr r10, r10, r3, lsl #8
HXF_FIXED_TO_COLORBYTE r4, r7 ; B
orr r10, r10, r4
str r10, [r2] ; Output ARGB
mov pc, lr
ENDP
;** ************************************************************************ **
;** Fogging Support
;** ************************************************************************ **
; ---------------------------------------------------------------------- --
; Register Map - Fog Procs
; ---------------------------------------------------------------------- --
; r0 = pState r4 = r8 = r12 = pOutVtx
; r1 = flags r5 = r9 = r13 = sp
; r2 = Spec Off r6 = r10 = r14 = lr (return)
; r3 = r7 = r11 = r15 = pc
; ---------------------------------------------------------------------- --
; wr0 = wr4 = wr8 = wr12 = 0xFFFF0000
; wr1 = wr5 = wr9 = wr13 =
; wr2 = wr6 = wr10 = wr14 = Zero
; wr3 = wr7 = wr11 = 0x04040404wr15 =
; wcgr0 = 16 wcgr1 = 12 wcgr2 = 8 wcgr3 = 32
; ---------------------------------------------------------------------- --
;** ************************************************************************ **
|HXFFogExpProc| PROC
ldrd r4, [r0, #HXFSTATE_OFFSET_STORAGE_VTXPOSITION_X] ; Load in X,Y Position
ldrd r8, [r0, #HXFSTATE_OFFSET_FOG_XFORM_X] ; Load the Fog X, Y Transform
ldrd r6, [r0, #HXFSTATE_OFFSET_STORAGE_VTXPOSITION_Z] ; Load in Z, W Position
ldrd r10, [r0, #HXFSTATE_OFFSET_FOG_XFORM_Z] ; Load the Fog Z, W Transform
wzero wr0
tmia wr0, r4, r8
tmia wr0, r5, r9
tmia wr0, r6, r10
tmia wr0, r7, r11
ldr r5, [r0, #HXFSTATE_OFFSET_FOG_DENSITY] ; Load the Fog End, Fog Inverse Range
wsradg wr0, wr0, wcgr0
textrmsw r3, wr0, #0
; fFog = GLES_POWF(GLES_E, -fDensity * zPos);
; abs(ed)
cmp r3, #0
rsblt r3, r3, #0
; fp = Density * (zPos)
smull r4, r5, r3, r5
mov r5, r5, lsl #16
orr r3, r5, r4, lsr #16 ;Concatenating Lo and Hi
rsb r3, r3, #0 ; -(fp)
; Clamp to Max
cmp r3, #0
movge r3, #0xFF;
bge HTV_FOG_EXP_STORE
; r0 * Scale Factor(0xFFFFD1D8) -0.180310 = 1.0f/-5.546000f
mov r4, #0xFF000000
orr r4, r4, #0x00FF0000
orr r4, r4, #0x0000D100
orr r4, r4, #0x000000D8
smull r5, r6, r3, r4
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -