📄 xplarm.s
字号:
wzero wr2
ldr r12, [r1] ; Load v1.x
ldr r4, [r2] ; Load M_11
ldr r5, [r2, #4] ; Load M_12
ldr r3, [r2, #8] ; Load M_13
tmia wr0, r12, r4 ; r.x += v1.x * M_11
tmia wr1, r12, r5 ; r.y += v1.x * M_12
tmia wr2, r12, r3 ; r.z += v1.x * M_13
ldr r12, [r1, #4] ; Load v1.y
ldr r4, [r2, #16] ; Load M_21
ldr r5, [r2, #20] ; Load M_22
ldr r3, [r2, #24] ; Load M_23
tmia wr0, r12, r4 ; r.x += v1.y * M_21
tmia wr1, r12, r5 ; r.y += v1.y * M_22
tmia wr2, r12, r3 ; r.z += v1.y * M_23
ldr r12, [r1, #8] ; Load v1.z
ldr r4, [r2, #32] ; Load M_31
ldr r5, [r2, #36] ; Load M_32
ldr r3, [r2, #40] ; Load M_33
tmia wr0, r12, r4 ; r.x += v1.z * M_31
tmia wr1, r12, r5 ; r.y += v1.z * M_32
tmia wr2, r12, r3 ; r.z += v1.z * M_33
mov r12, #HFX_ONE
ldr r4, [r2, #48] ; Load M_41
ldr r5, [r2, #52] ; Load M_42
ldr r3, [r2, #56] ; Load M_43
tmia wr0, r12, r4 ; r.x += M_41
tmia wr1, r12, r5 ; r.y += M_42
tmia wr2, r12, r3 ; r.z += M_43
wsrldg wr0, wr0, wcgr0 ; r.x >> 16
wsrldg wr1, wr1, wcgr0 ; r.y >> 16
wsrldg wr2, wr2, wcgr0 ; r.z >> 16
wstrw wr0, [r0]
wstrw wr1, [r0, #4]
wstrw wr2, [r0, #8]
ldmfd sp!, {r4-r5}
mov pc, lr
ENDP
;/* ************************************************************************* *\
; FUNCTION: HVec4FX_Transform
; DESCRIPTION:
; []
;
;\* ************************************************************************* */
;HVECTOR4F* HVec4FX_Transform(HVECTOR4F* in_pDst, const HVECTOR4F* in_pB,
; const HMATRIX4F* in_pMat)
;{
; in_pDst->x = (in_pMat->_11 * in_pB->x) + (in_pMat->_21 * in_pB->y)
; + (in_pMat->_31 * in_pB->z) + (in_pMat->_41 * in_pB->w);
; in_pDst->y = (in_pMat->_12 * in_pB->x) + (in_pMat->_22 * in_pB->y)
; + (in_pMat->_32 * in_pB->z) + (in_pMat->_42 * in_pB->w);
; in_pDst->z = (in_pMat->_13 * in_pB->x) + (in_pMat->_23 * in_pB->y)
; + (in_pMat->_33 * in_pB->z) + (in_pMat->_43 * in_pB->w);
; in_pDst->w = (in_pMat->_14 * in_pB->x) + (in_pMat->_24 * in_pB->y)
; + (in_pMat->_34 * in_pB->z) + (in_pMat->_44 * in_pB->w);
;
; return in_pDst;
;}
|Vector4x_Transform| PROC
stmfd sp!, {r4-r6}
; Set up a shifter amount in the wMMX control register.
mov r3, #16
tmcr wcgr0, r3
; Clear the wmmx destination registers
wzero wr0
wzero wr1
wzero wr2
wzero wr3
ldr r12, [r1] ; Load v1.x
ldr r4, [r2] ; Load M_11
ldr r5, [r2, #4] ; Load M_12
ldr r3, [r2, #8] ; Load M_13
ldr r6, [r2, #12] ; Load M_14
tmia wr0, r12, r4 ; r.x += v1.x * M_11
tmia wr1, r12, r5 ; r.y += v1.x * M_12
tmia wr2, r12, r3 ; r.z += v1.x * M_13
tmia wr3, r12, r6 ; r.w += v1.x * M_14
ldr r12, [r1, #4] ; Load v1.y
ldr r4, [r2, #16] ; Load M_21
ldr r5, [r2, #20] ; Load M_22
ldr r3, [r2, #24] ; Load M_23
ldr r6, [r2, #28] ; Load M_24
tmia wr0, r12, r4 ; r.x += v1.y * M_21
tmia wr1, r12, r5 ; r.y += v1.y * M_22
tmia wr2, r12, r3 ; r.z += v1.y * M_23
tmia wr3, r12, r6 ; r.w += v1.y * M_24
ldr r12, [r1, #8] ; Load v1.z
ldr r4, [r2, #32] ; Load M_31
ldr r5, [r2, #36] ; Load M_32
ldr r3, [r2, #40] ; Load M_33
ldr r6, [r2, #44] ; Load M_34
tmia wr0, r12, r4 ; r.x += v1.z * M_31
tmia wr1, r12, r5 ; r.y += v1.z * M_32
tmia wr2, r12, r3 ; r.z += v1.z * M_33
tmia wr3, r12, r6 ; r.w += v1.z * M_34
ldr r12, [r1, #12] ; Load v1.z
ldr r4, [r2, #48] ; Load M_41
ldr r5, [r2, #52] ; Load M_42
ldr r3, [r2, #56] ; Load M_43
ldr r6, [r2, #60] ; Load M_44
tmia wr0, r12, r4 ; r.x += v1.w * M_41
tmia wr1, r12, r5 ; r.y += v1.w * M_42
tmia wr2, r12, r3 ; r.z += v1.w * M_43
tmia wr3, r12, r6 ; r.w += v1.w * M_44
wsrldg wr0, wr0, wcgr0 ; r.x >> 16
wsrldg wr1, wr1, wcgr0 ; r.y >> 16
wsrldg wr2, wr2, wcgr0 ; r.z >> 16
wsrldg wr3, wr3, wcgr0 ; r.w >> 16
wstrw wr0, [r0]
wstrw wr1, [r0, #4]
wstrw wr2, [r0, #8]
wstrw wr3, [r0, #12]
ldmfd sp!, {r4-r6}
mov pc, lr
ENDP
;/* ************************************************************************* *\;
; FUNCTION: HVec3FX_Normalize
; DESCRIPTION:
;
;\* ************************************************************************* */
;HVECTOR3FX* HVec3FX_Normalize(HVECTOR3FX*in_pDst, const HVECTOR3FX* in_pB)
;{
; HFIXED l = HXF_FX_ZERO;
;// FIXME FX OPTIMIZE
; l = HMulFX(in_pB->x, in_pB->x)+
; HMulFX(in_pB->y, in_pB->y)+
; HMulFX(in_pB->z, in_pB->z);
; if(l != HXF_FX_ONE)
; {
; l = HInvSqrtFX(l); // 1.0f/(float)sqrt(l);
; in_pDst->x = HMulFX(in_pB->x, l);
; in_pDst->y = HMulFX(in_pB->y, l);
; in_pDst->z = HMulFX(in_pB->z, l);
; }
; else
; {
; in_pDst->x = in_pB->x;
; in_pDst->y = in_pB->y;
; in_pDst->z = in_pB->z;
; }
; return in_pDst;
;}
|Vector3x_Normalize| PROC
stmfd sp!, {r4-r10}
mov r9, #16 ; shift value for 32.32 to 16.16 conversion
tmcr wcgr0, r9
mov r9, #32 ; shift value for working in wMMX doing transforms.
tmcr wcgr1, r9
; ---------------------------------------------------------------------- --
; Register Map -
; ---------------------------------------------------------------------- --
; r0 = pRetVec r4 = r8 = r12 = pOutVtx
; r1 = pSrcVec r5 = r9 = r13 = sp
; r2 = r6 = r10 = r14 = link
; r3 = r7 = r11 = Reserved r15 = pc
; ---------------------------------------------------------------------- --
; wr0 = wr4 = wr8 = wr12 =
; wr1 = wr5 = wr9 = wr13 =
; wr2 = wr6 = wr10 = wr14 =
; wr3 = wr7 = wr11 = wr15 =
; wcgr0 = wcgr1 = wcgr2 = wcgr3 =
; ---------------------------------------------------------------------- --
; Calculate DSqr
ldr r2, [r1] ; x
ldr r3, [r1, #4] ; y
wzero wr15
ldr r4, [r1,#8] ; z
tmia wr15, r2, r2 ; x*x
tmia wr15, r3, r3 ; + y*y
tmia wr15, r4, r4 ; + z*z
; CLZ on Dsquared
; if lz >= 24, fall through (Q16.16 path)
; if lz < 24, use 32.32 path
textrmsw r5, wr15, #1 ; grab top bits
clz r8, r5 ; CLZ on 32. value
cmp r8, #24
bgt DSQR16_16
DSQR32_32
; Grab the 32.0 value to get DSqr, then get a 0.32 value for InvD.
; we have the leading zeroes, and the 32 bits from the top of the 32.32 DSqr.
; To get to Fat Float, we need to shift right by CLZ - 8 bits
rsbs r8, r8, #8 ; prep exponent for mantissa shift
movge r7, r5, asr r8 ; shift mantissa right (if exp was pos)
rsblt r7, r8, #0
movlt r7, r5, lsl r7 ; shift mantissa left (if exp was neg)
add r8, r8, #23 ; shift exponent
; r5 = DSqr Q32
; r7 = DSqr mantissa FF
; r8 = DSqr Exponent FF
HXF_INVSQRT r7, r8, r6, r9, r10
; r5 = DSqr Q32
; r7 = InvD mantissa FF
; r8 = InvD exponent FF
; Convert to .32
mov r6, #9
adds r8, r6, r8
; mantisa
movgt r7, r7, lsl r8
rsblt r8, r8, #0
movlt r7, r7, lsr r8
; *************
; Compute VL
; Normalize the vector
wzero wr6
wzero wr7
wzero wr8
; Normalize VL * InvD
; *************
; Compute VL - Normalize the vector
tmia wr6, r2, r7 ; VL.x = InvD * X - r2 Free
tmia wr7, r3, r7 ; VL.y = InvD * Y - r3 Free
tmia wr8, r4, r7 ; VL.z = InvD * Z - r4, r7 Free
wsradg wr6, wr6, wcgr1
wsradg wr7, wr7, wcgr1
wsradg wr8, wr8, wcgr1
wstrw wr6, [r0]
wstrw wr7, [r0, #4]
wstrw wr8, [r0, #8]
b HVEC3FX_NORMALIZE_EXIT
DSQR16_16
wsradg wr15, wr15, wcgr0
textrmsw r5, wr15, #0 ; grab top bits
; Extract the 16.16 value
cmp r5, #HFX_ONE
beq HVEC3FX_NORMALIZE_COPY
cmp r5, #0
beq HVEC3FX_NORMALIZE_COPY
; *************
; Calculate InvD - inverse sqrt of DSqr
mov r7, r5
HXF_INVSQRTFX r7, r8, r6, r10, r9, HVec3FX_Normalize
; *************
; Compute VL
; Normalize the vector
wzero wr6
wzero wr7
wzero wr8
; Normalize VL * InvD
; *************
; Compute VL - Normalize the vector
tmia wr6, r2, r7 ; VL.x = InvD * X - r2 Free
tmia wr7, r3, r7 ; VL.y = InvD * Y - r3 Free
tmia wr8, r4, r7 ; VL.z = InvD * Z - r4, r7 Free
wsradg wr6, wr6, wcgr0 ; Convert to 32 bits 16.16
wsradg wr7, wr7, wcgr0
wsradg wr8, wr8, wcgr0
wstrw wr6, [r0]
wstrw wr7, [r0, #4]
wstrw wr8, [r0, #8]
HVEC3FX_NORMALIZE_EXIT
ldmfd sp!, {r4-r10}
mov pc, lr
HVEC3FX_NORMALIZE_COPY
str r2, [r0]
str r3, [r0, #4]
str r4, [r0, #8]
b HVEC3FX_NORMALIZE_EXIT
ENDP
;/* ************************************************************************* *\
; FUNCTION: HXFPackNormalFX
; DESCRIPTION:
;\* ************************************************************************* */
;HXFNORMAL* HXFPackNormalFX(HXFNORMAL* in_pDst, HFIXED x, HFIXED y, HFIXED z)
;{
; HXFASSERT(in_pDst);
;
; // FIXME Normalize
;
; in_pDst->x = (x>>2);
; in_pDst->y = (y>>2);
; in_pDst->z = (z>>2);
; in_pDst->w = 0;
; return in_pDst;
;}
; output will be WZ, YZ (w = 0)
|Normal_Pack3x| PROC
mov r12, #-1
mov r12, r12, lsr #16
mov r3, r3 asr #2
and r3, r3, r12
str r3, [r0, #4]
mov r1, r1 asr #2
and r1, r1, r12
mov r2, r2 lsl #14
and r2, r2, r12 lsl #16
orr r1, r1, r2
str r1, [r0]
mov pc, lr
ENDP
;** ************************************************************************ **
; Name: HFToFX
; Description:
; Input Arguments: r0 - Float value to convert
; Output Argument: r0 - Fixed point return falue
; Prototype in C: HFIXED HFToFX(float);
;** ************************************************************************ **
|FtoX| PROC
HXF_FToFX r0, r1, r2, FToX
mov pc, lr
ENDP
;** ************************************************************************ **
; Name: HFXToF
; Description: Converts a fixed point number to a float
; Input Arguments: r0 - Fixed value to convert
; Output Argument: r0 - Float point return falue
; Prototype in C: float HFToFX(HFIXED);
;** ************************************************************************ **
|XtoF| PROC
mov r1, r0
HXF_FXToF r0, r1, r2, XtoF
mov pc, lr
ENDP
;** ************************************************************************ **
; Name: HFToFXArray
; Description:
; Input Arguments: r0 - Array to receive the converted values
; r1 - Array of values to convert
; r2 - Number of values to convert
; Output Argument:
; Prototype in C: HFIXED* HFToFXArray(HFIXED*, float*, HUHINT32);
;** ************************************************************************ **
|ArrayFtoX| PROC
cmp r2, #0 ; make sure we have data to process
moveq pc, lr ; return
stmfd sp!, { r4-r6 }
mov r3, r0
HFTOFXARRAY_LOOP
ldr r4, [r1]
HXF_FToFX r4, r5, r6, ArrayFtoX
str r4, [r3] ; store the converted value
; update the pointers and counters
add r3, r3, #4
add r1, r1, #4
subs r2, r2, #1
bne HFTOFXARRAY_LOOP
ldmfd sp!, {r4-r6}
mov pc, lr
endp
;** ************************************************************************ **
; Name: HFXToFArray
; Description:
; Input Arguments: r0 - output array of float data
; r1 - input array of fixed data
; r2 - Number of elements to convert
; Output Argument: r0 - output array of float data
; Prototype in C: HFIXED* HFXToFArray(FLOAT*, HFIXED*, HUHINT32);
;** ************************************************************************ **
|ArrayXtoF| PROC
cmp r2, #0 ; make sure we have data to process
moveq pc, lr ; return
stmfd sp!, { r4-r7 }
mov r3, r0 ; copy out array point so that we can preserve it
HFXTOFARRAY_LOOP
ldr r5, [r1]
HXF_FXToF r6, r5, r7
str r6, [r3] ; store the converted value
; update the pointers and counters
add r3, r3, #4
add r1, r1, #4
subs r2, r2, #1
bne HFXTOFARRAY_LOOP
ldmfd sp!, {r4-r7}
mov pc, lr
ENDP
;** ************************************************************************ **
; Name: HXFAbsF
; Description:
; Input Arguments:
; Output Argument:
; Prototype in C: void HXFAbsF(float f);
;** ************************************************************************ **
|HXFAbsF| PROC
mvn r1, #0x80000000
and r0, r0, r1
mov pc, lr
ENDP
;** ************************************************************************ **
; Name: HXFClz
; Description: returns the number of leading zeros
; Input Arguments:
; Output Argument:
; Prototype in C: HUINT32 HClz(HUINT32);
;** ************************************************************************ **
|HXFClz| PROC
clz r0, r0
mov pc, lr
ENDP
;** ************************************************************************ **
; Name: HXFSignedClz
; Description: returns the number of leading zeros of a signed number.
; Input Arguments:
; Output Argument:
; Prototype in C: HUINT32 HXFSignedClz(HUINT32);
;** ************************************************************************ **
|HXFSignedClz| PROC
movs r1, r0
rsblt r1, r1, #0
clz r0, r1
mov pc, lr
ENDP
;** ************************************************************************ **
; Name: HXFCtz
; Description: returns the number of trailing zeros of an
; unsigned number.
; Input Arguments:
; Output Argument:
; Prototype in C: HUINT32 HCtz(HUINT32);
;** ************************************************************************ **
|HXFCtz| PROC
mvn r2, r0 ; Tcount = 32 - CLZ( (~v) & (v - 1) )
sub r1, r0, #1
and r0, r2, r1
clz r0, r0
rsb r0, r0, #32
mov pc, lr
ENDP
;** ************************************************************************ **
END
;/* ************************************************************************ *\
;** EOF
;\* ************************************************************************ */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -