📄 hxfcliparm.s
字号:
;/* ************************************************************************ *\
;** INTEL Corporation Proprietary Information
;**
;** This listing is supplied under the terms of a license
;** agreement with INTEL Corporation and may not be copied
;** nor disclosed except in accordance with the terms of
;** that agreement.
;**
;** Copyright (c) 2003 Intel Corporation.
;** All Rights Reserved.
;**
;** ************************************************************************ **
;** FILE: HXFClip.s
;** DESCRIPTION:
;**
;** AUTHOR: Chau Doan
;** CREATED: July 31, 2003
;**
; * $Date: 3/25/04 1:27p $ $Revision: 9 $
; * $Log: /Intel_Development/Drivers/Marathon/WinCE42/opengles/HXFClipARM.s $
; *
; * 9 3/25/04 1:27p Clmontgo
; * Optimization of Clip flag generation and VP XForm. Fix for Clipping
; * issue observed in previous version.
; * Revision 1.7 2004/03/22 11:43:39 bcb
; * New Intel code drop. 22/03/04
; *
; *8 12/31/03 2:07p Clmontgo
; *Fixes for screen rotation and Near Clip plane problem
;
; 7 12/21/03 12:53p Clmontgo
;
; 6 12/17/03 9:20a Clmontgo
; Added Version ID and log to file headers
;
; 5 12/17/03 8:42a Clmontgo
;\* ************************************************************************ */
INCLUDE HXFState.inc ; Definitions of the HXFState Structure
;** ************************************************************************ **
;** CONSTANTS
;** ************************************************************************ **
;** ************************************************************************ **
;** IMPORTS
;** ************************************************************************ **
;** ************************************************************************ **
;** EXPORTS
;** ************************************************************************ **
EXPORT HXFClipInterpolateXXX
;** ************************************************************************ **
;** VARIABLES
;** ************************************************************************ **
AREA HXFCLIP, CODE, READONLY
;** ************************************************************************ **
; Name: HXFClipInterpolate
; Description: Clips a line (line or triangle segment) against a specified
; Plane. We use H-Space coordinate to ensure that we can get proper
; interpolation values. The Clip space coordinates are still stored in
; mantissa/exponent form.
;
; 1) Load component of interest [x,y,z] for in and out
;
; 2) alpha - interpolation distance.
; (in.v[idx] - ( vp[boundary])) / (in.v[idx] - out.v[idx])
; Compute delta of component of interest
; Sign correct (in which case?)
; 3) Compute new ClipPos
; new.clip = in.clip + alpha * (out.clip - in.clip)
; 4) compute New Positions
; new.pos = new.clip/new.clip.w
; 5) compute New Diffuse
; new.dif = in.dif + alpha * (out.dif - in.dif)
; 6) compute New Specular
; new.spec = in.spec + alpha * (out.spec - in.spec)
; 7) compute New Tex1
; new.Tex1 = in.Tex1 + alpha * (out.Tex1 - in.Tex1)
; 8) compute New Tex2
; new.Tex2 = in.Tex2 + alpha * (out.Tex2 - in.Tex2)
;
; Register Map: Standard calling convention.
; ---------------------------------------------------------------------- --
; Register Map -
; ---------------------------------------------------------------------- --
; r0 = pState r4 = r8 = r12 =
; r1 = pInPos r5 = r9 = r13 = sp (boundary*)
; r2 = pOutPos r6 = r10 = r14 =
; r3 = pNewSeg r7 = r11 = r15 = pc
; ---------------------------------------------------------------------- --
; wr0 = inPos.x wr4 = OutPos.y wr8 = wr12 =
; wr1 = inPos.y wr5 = OutPos.x wr9 = wr13 = VP[boundary]
; wr2 = inPos.z wr6 = OutPos.z wr10 = wr14 = inPos[idx]
; wr3 = outPos.x wr7 = wr11 = wr15 = outPos.idx
; wcgr0 = wcgr1 = wcgr2 = wcgr3 =
; ---------------------------------------------------------------------- --
;
; Prototype in C: void HXFClipInterpolate(HXFState* pState, void* pIn, void* pOut,
; HUINT32 boundary, HXFClipSegment* pNewNode);
;** ************************************************************************ **
|HXFClipInterpolateXXX| PROC
ldr r12, [sp] ; retrive boundary off the stack
stmfd sp!, { r4-r11, lr }
; We only really need 1 coordinate for to generate alpha.
; r10 = start of Clip Position + (boundary>>1)
; ldr r10, [r0, #HXFSTATE_OFFSET_OUTCLIPPOSOFFSET]
;
; mov r9, r12 lsr #1 ; index = boundary >> 1
;
;
; add r10, r10, r9, lsl #2 ; offset
;
ldr r9, [r1, #12] ; in.w
ldr r8, [r2, #12] ; out.w
mov r10, r12 lsr #1 ; index = boundary >> 1
mov r10, r10 lsl #2
ldr r4, [r1, r10] ; inPos.v[index]
ldr r5, [r2, r10] ; outPos.v[index]
; grab Viewport
mov r11, r12 lsl #2 ; word size
; add r11, r11, #HXFSTATE_OFFSET_VIEWPORT_X1
ldr r6, [r0, r11]
cmp r9, #0 ; is w negative?
rsblt r4, r4, #0
cmp r8, #0 ; is w negative?
rsblt r5, r5, #0
; load partial "in" data way ahead...
ldr r9, [r1, #12] ; in.w
; ---------------------------------------------------------------------- --
; Register Map -
; ---------------------------------------------------------------------- --
; r0 = pState r4 = inPos[idx] r8 = r12 = boundary
; r1 = pInPos r5 = OutPos[idx] r9 = r13 = sp
; r2 = pOutPos r6 = pInPos r10 = index r14 =
; r3 = pNewSeg r7 = r11 = r15 = pc
; ---------------------------------------------------------------------- --
; wr0 = inPos.x wr4 = OutPos.y wr8 = wr12 =
; wr1 = inPos.y wr5 = OutPos.x wr9 = wr13 = VP[boundary]
; wr2 = inPos.z wr6 = OutPos.z wr10 = wr14 = inPos[idx]
; wr3 = outPos.x wr7 = wr11 = wr15 = outPos.idx
; wcgr0 = wcgr1 = wcgr2 = wcgr3 =
; ---------------------------------------------------------------------- --
; Work to do before divide:
; temp.v = out.v - in.v
; BC0 = Viewport[boundary] - inPos.v[index];
; BC1 = outPos.v[index] - inPos.v[index];
; alpha = BC0/BC1
; BC1
subs r5, r5, r4
mov r7, r5 ; Since BC1 = out - in, we can reuse it later
rsblt r5, r5, #0 ; Ensure that BC1 is positive
; BC0
subs r6, r6, r4
rsblt r6, r6, #0 ; Ensure that BC0 is positive
; ---------------------------------------------------------------------- --
; Register Map -
; ---------------------------------------------------------------------- --
; r0 = pState r4 = inPos[idx] r8 = r12 = boundary
; r1 = pInPos r5 = BC1 r9 = r13 = sp
; r2 = pOutPos r6 = BC0 r10 = index r14 =
; r3 = pNewSeg r7 = out.v-in.v r11 = r15 = pc
; ---------------------------------------------------------------------- --
; wr0 = inPos.x wr4 = OutPos.y wr8 = wr12 =
; wr1 = inPos.y wr5 = OutPos.x wr9 = wr13 = VP[boundary]
; wr2 = inPos.z wr6 = OutPos.z wr10 = wr14 = inPos[idx]
; wr3 = outPos.x wr7 = wr11 = wr15 = outPos.idx
; wcgr0 = wcgr1 = wcgr2 = wcgr3 =
; ---------------------------------------------------------------------- --
; shift r11 << 16 for 0.32 optimization when done!
clz r10, r6 ; nlz
clz r11, r5 ; dlz
; shift to full mantissa
mov r6, r6 lsl r10
mov r5, r5 lsl r11
; FIXME: Do we really need all 16 bits?
; Do the division computation
mov r8, #0
HXF_ONEBITDIVIDE 31, r6, r5, r8
HXF_ONEBITDIVIDE 30, r6, r5, r8
HXF_ONEBITDIVIDE 29, r6, r5, r8
HXF_ONEBITDIVIDE 28, r6, r5, r8
HXF_ONEBITDIVIDE 27, r6, r5, r8
HXF_ONEBITDIVIDE 26, r6, r5, r8
HXF_ONEBITDIVIDE 25, r6, r5, r8
HXF_ONEBITDIVIDE 24, r6, r5, r8
HXF_ONEBITDIVIDE 23, r6, r5, r8
HXF_ONEBITDIVIDE 22, r6, r5, r8
HXF_ONEBITDIVIDE 21, r6, r5, r8
HXF_ONEBITDIVIDE 20, r6, r5, r8
HXF_ONEBITDIVIDE 19, r6, r5, r8
HXF_ONEBITDIVIDE 18, r6, r5, r8
HXF_ONEBITDIVIDE 17, r6, r5, r8
HXF_ONEBITDIVIDE 16, r6, r5, r8
; restore correct "pointedness"
subs r5, r10, r11
ldr r11, [r2, #12] ; out.w preload
addlt r5, r5, #-7
addge r5, r5, #7
movlt r8, r8, lsl r5
movge r8, r8, lsr r5
; Now, do a scale/add from inPos to outPos
smull r6, r5, r8, r7 ; want a 16.48 result, with 16.16 in r5
ldr r7, [r1, #8] ; in.z preload
; get 16.16 result from 32.32 between r5 & r6
mov r5, r5, lsl #16
orr r5, r5, r6 lsr #16
ldr r6, [r1, #4] ; in.y preload
; at delta to in.v
add r5, r5, r4
ldr r4, [r1, #0] ; in.x preload
; Lastly, do the clamp for even/odd boundaries
;FIXME CLAMP GONE ARY
; movs r12, r12 lsr #1
; mov r10, #-1
; andcs r5, r5, r10 lsl #16
; bcs CLAMPED
;
; cmp r5, #0
; rsblt r5, r5,#0
; and r5, r5, r10 lsl #16
;
;CLAMPED
; in wMMX, do an out = out - in, scale, and add.
cmp r9, #0 ; is in.w negative? (from way above!)
rsblt r4, r4, #0
rsblt r6, r6, #0
rsblt r7, r7, #0
rsblt r9, r9, #0
; now, repeat for outPos!
ldr r10, [r2, #0] ; load out.x
cmp r11, #0 ; is out.w < 0?
rsblt r11, r11, #0
tinsrw wr0, r4, #0 ; insert "in" into wMMX unpacked!
tinsrw wr1, r6, #0
tinsrw wr2, r7, #0
tinsrw wr3, r9, #0
rsblt r10, r10, #0
sub r9, r11, r9 ; out.w - in.w
sub r4, r10, r4 ; out.x - in.x
ldr r10, [r2, #4]
ldr r11, [r2, #8]
wzero wr4
wzero wr5
wzero wr6
wzero wr7
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -