📄 xform.s
字号:
/* $Id: xform.S,v 1.4 2005/07/28 00:11:11 idr Exp $ */
/* TODO
*
* 1) It would be nice if load/store double could be used
* at least for the matrix parts. I think for the matrices
* it is safe, but for the vertices it probably is not due to
* things like glInterleavedArrays etc.
*
* UPDATE: Trying this now in sparc_matrix.h -DaveM_990624
*
* 2) One extremely slick trick would be if we could enclose
* groups of xform calls on the same vertices such that
* we just load the matrix into f16-->f31 before the calls
* and then we would not have to do them here. This may be
* tricky and not much of a gain though.
*/
#include "sparc_matrix.h"
#if defined(SVR4) || defined(__SVR4) || defined(__svr4__)
/* Solaris requires this for 64-bit. */
.register %g2, #scratch
.register %g3, #scratch
#endif
.text
.align 64
__set_v4f_1:
ld [%o0 + V4F_FLAGS], %g2
mov 1, %g1
st %g1, [%o0 + V4F_SIZE]
or %g2, VEC_SIZE_1, %g2
retl
st %g2, [%o0 + V4F_FLAGS]
__set_v4f_2:
ld [%o0 + V4F_FLAGS], %g2
mov 2, %g1
st %g1, [%o0 + V4F_SIZE]
or %g2, VEC_SIZE_2, %g2
retl
st %g2, [%o0 + V4F_FLAGS]
__set_v4f_3:
ld [%o0 + V4F_FLAGS], %g2
mov 3, %g1
st %g1, [%o0 + V4F_SIZE]
or %g2, VEC_SIZE_3, %g2
retl
st %g2, [%o0 + V4F_FLAGS]
__set_v4f_4:
ld [%o0 + V4F_FLAGS], %g2
mov 4, %g1
st %g1, [%o0 + V4F_SIZE]
or %g2, VEC_SIZE_4, %g2
retl
st %g2, [%o0 + V4F_FLAGS]
/* First the raw versions. */
.globl _mesa_sparc_transform_points1_general
_mesa_sparc_transform_points1_general:
ld [%o2 + V4F_STRIDE], %o5
LDPTR [%o2 + V4F_START], %g1
LDPTR [%o0 + V4F_START], %g2
ld [%o2 + V4F_COUNT], %g3
LDMATRIX_0_1_2_3_12_13_14_15(%o1)
cmp %g3, 1
st %g3, [%o0 + V4F_COUNT]
bl 3f
clr %o1
be 2f
andn %g3, 1, %o2
1: ld [%g1 + 0x00], %f0 ! LSU Group
add %g1, %o5, %g1 ! IEU0
ld [%g1 + 0x00], %f8 ! LSU Group
add %o1, 2, %o1 ! IEU0
add %g1, %o5, %g1 ! IEU1
fmuls %f0, M0, %f1 ! FGM Group 1-cycle stall on %f0
fmuls %f0, M1, %f2 ! FGM Group
fmuls %f0, M2, %f3 ! FGM Group
fmuls %f0, M3, %f4 ! FGM Group
fmuls %f8, M0, %f9 ! FGM Group f1 available
fadds %f1, M12, %f1 ! FGA
st %f1, [%g2 + 0x00] ! LSU
fmuls %f8, M1, %f10 ! FGM Group f2 available
fadds %f2, M13, %f2 ! FGA
st %f2, [%g2 + 0x04] ! LSU
fmuls %f8, M2, %f11 ! FGM Group f3 available
fadds %f3, M14, %f3 ! FGA
st %f3, [%g2 + 0x08] ! LSU
fmuls %f8, M3, %f12 ! FGM Group f4 available
fadds %f4, M15, %f4 ! FGA
st %f4, [%g2 + 0x0c] ! LSU
fadds %f9, M12, %f9 ! FGA Group f9 available
st %f9, [%g2 + 0x10] ! LSU
fadds %f10, M13, %f10 ! FGA Group f10 available
st %f10, [%g2 + 0x14] ! LSU
fadds %f11, M14, %f11 ! FGA Group f11 available
st %f11, [%g2 + 0x18] ! LSU
fadds %f12, M15, %f12 ! FGA Group f12 available
st %f12, [%g2 + 0x1c] ! LSU
cmp %o1, %o2 ! IEU1
bne 1b ! CTI
add %g2, 0x20, %g2 ! IEU0 Group
cmp %o1, %g3
be 3f
nop
2: ld [%g1 + 0x00], %f0 ! LSU Group
fmuls %f0, M0, %f1 ! FGM Group 1-cycle stall on %f0
fmuls %f0, M1, %f2 ! FGM Group
fmuls %f0, M2, %f3 ! FGM Group
fmuls %f0, M3, %f4 ! FGM Group
fadds %f1, M12, %f1 ! FGA Group
st %f1, [%g2 + 0x00] ! LSU
fadds %f2, M13, %f2 ! FGA Group
st %f2, [%g2 + 0x04] ! LSU
fadds %f3, M14, %f3 ! FGA Group
st %f3, [%g2 + 0x08] ! LSU
fadds %f4, M15, %f4 ! FGA Group
st %f4, [%g2 + 0x0c] ! LSU
3:
ba __set_v4f_4
nop
.globl _mesa_sparc_transform_points1_identity
_mesa_sparc_transform_points1_identity:
cmp %o0, %o2
be 4f
ld [%o2 + V4F_STRIDE], %o5
LDPTR [%o2 + V4F_START], %g1
LDPTR [%o0 + V4F_START], %g2
ld [%o2 + V4F_COUNT], %g3
cmp %g3, 1
st %g3, [%o0 + V4F_COUNT]
bl 3f
clr %o1
be 2f
andn %g3, 1, %o2
1: ld [%g1 + 0x00], %f0 ! LSU Group
add %g1, %o5, %g1 ! IEU0
ld [%g1 + 0x00], %f1 ! LSU Group
add %o1, 2, %o1 ! IEU0
add %g1, %o5, %g1 ! IEU1
st %f0, [%g2 + 0x00] ! LSU Group
cmp %o1, %o2 ! IEU1
st %f1, [%g2 + 0x10] ! LSU Group
bne 1b ! CTI
add %g2, 0x20, %g2 ! IEU0
cmp %o1, %g3
be 3f
nop
2: ld [%g1 + 0x00], %f0
addx %g0, %g0, %g0
st %f0, [%g2 + 0x00]
3:
ba __set_v4f_1
nop
4: retl
nop
.globl _mesa_sparc_transform_points1_2d
_mesa_sparc_transform_points1_2d:
ld [%o2 + V4F_STRIDE], %o5
LDPTR [%o2 + V4F_START], %g1
LDPTR [%o0 + V4F_START], %g2
ld [%o2 + V4F_COUNT], %g3
LDMATRIX_0_1_12_13(%o1)
cmp %g3, 1
st %g3, [%o0 + V4F_COUNT]
bl 3f
clr %o1
be 2f
andn %g3, 1, %o2
1: ld [%g1 + 0x00], %f0 ! LSU Group
add %g1, %o5, %g1 ! IEU0
ld [%g1 + 0x00], %f8 ! LSU Group
add %o1, 2, %o1 ! IEU0
add %g1, %o5, %g1 ! IEU1
fmuls %f0, M0, %f1 ! FGM Group
fmuls %f0, M1, %f2 ! FGM Group
fmuls %f8, M0, %f9 ! FGM Group
fmuls %f8, M1, %f10 ! FGM Group
fadds %f1, M12, %f3 ! FGA Group f1 available
st %f3, [%g2 + 0x00] ! LSU
fadds %f2, M13, %f4 ! FGA Group f2 available
st %f4, [%g2 + 0x04] ! LSU
fadds %f9, M12, %f11 ! FGA Group f9 available
st %f11, [%g2 + 0x10] ! LSU
fadds %f10, M13, %f12 ! FGA Group f10 available
st %f12, [%g2 + 0x14] ! LSU
cmp %o1, %o2 ! IEU1
bne 1b ! CTI
add %g2, 0x20, %g2 ! IEU0 Group
cmp %o1, %g3
be 3f
nop
2: ld [%g1 + 0x00], %f0
fmuls %f0, M0, %f1
fmuls %f0, M1, %f2
fadds %f1, M12, %f3
st %f3, [%g2 + 0x00]
fadds %f2, M13, %f4
st %f4, [%g2 + 0x04]
3:
ba __set_v4f_2
nop
.globl _mesa_sparc_transform_points1_2d_no_rot
_mesa_sparc_transform_points1_2d_no_rot:
ld [%o2 + V4F_STRIDE], %o5
LDPTR [%o2 + V4F_START], %g1
LDPTR [%o0 + V4F_START], %g2
ld [%o2 + V4F_COUNT], %g3
LDMATRIX_0_12_13(%o1)
cmp %g3, 1
st %g3, [%o0 + V4F_COUNT]
bl 3f
clr %o1
be 2f
andn %g3, 1, %o2
1: ld [%g1 + 0x00], %f0 ! LSU Group
add %g1, %o5, %g1 ! IEU0
ld [%g1 + 0x00], %f4 ! LSU Group
add %o1, 2, %o1 ! IEU0
add %g1, %o5, %g1 ! IEU1
fmuls %f0, M0, %f1 ! FGM Group
fmuls %f4, M0, %f5 ! FGM Group
fadds %f1, M12, %f3 ! FGA Group, 2 cycle stall, f1 available
st %f3, [%g2 + 0x00] ! LSU
st M13, [%g2 + 0x04] ! LSU Group, f5 available
fadds %f5, M12, %f6 ! FGA
st %f6, [%g2 + 0x10] ! LSU Group
st M13, [%g2 + 0x14] ! LSU Group
cmp %o1, %o2 ! IEU1
bne 1b ! CTI
add %g2, 0x20, %g2 ! IEU0 Group
cmp %o1, %g3
be 3f
nop
2: ld [%g1 + 0x00], %f0
fmuls %f0, M0, %f1
fadds %f1, M12, %f3
st %f3, [%g2 + 0x00]
st M13, [%g2 + 0x04]
3:
ba __set_v4f_2
nop
.globl _mesa_sparc_transform_points1_3d
_mesa_sparc_transform_points1_3d:
ld [%o2 + V4F_STRIDE], %o5
LDPTR [%o2 + V4F_START], %g1
LDPTR [%o0 + V4F_START], %g2
ld [%o2 + V4F_COUNT], %g3
LDMATRIX_0_1_2_12_13_14(%o1)
cmp %g3, 1
st %g3, [%o0 + V4F_COUNT]
bl 3f
clr %o1
be 2f
andn %g3, 1, %o2
1: ld [%g1 + 0x00], %f0 ! LSU Group
add %g1, %o5, %g1 ! IEU0
ld [%g1 + 0x00], %f4 ! LSU Group
add %o1, 2, %o1 ! IEU0
add %g1, %o5, %g1 ! IEU1
fmuls %f0, M0, %f1 ! FGM Group
fmuls %f0, M1, %f2 ! FGM Group
fmuls %f0, M2, %f3 ! FGM Group
fmuls %f4, M0, %f5 ! FGM Group
fadds %f1, M12, %f1 ! FGA Group, f1 available
st %f1, [%g2 + 0x00] ! LSU
fmuls %f4, M1, %f6 ! FGM
fadds %f2, M13, %f2 ! FGA Group, f2 available
st %f2, [%g2 + 0x04] ! LSU
fmuls %f4, M2, %f7 ! FGM
fadds %f3, M14, %f3 ! FGA Group, f3 available
st %f3, [%g2 + 0x08] ! LSU
fadds %f5, M12, %f5 ! FGA Group, f5 available
st %f5, [%g2 + 0x10] ! LSU
fadds %f6, M13, %f6 ! FGA Group, f6 available
st %f6, [%g2 + 0x14] ! LSU
fadds %f7, M14, %f7 ! FGA Group, f7 available
st %f7, [%g2 + 0x18] ! LSU
cmp %o1, %o2 ! IEU1
bne 1b ! CTI
add %g2, 0x20, %g2 ! IEU0 Group
cmp %o1, %g3
be 3f
nop
2: ld [%g1 + 0x00], %f0
fmuls %f0, M0, %f1
fmuls %f0, M1, %f2
fmuls %f0, M2, %f3
fadds %f1, M12, %f1
st %f1, [%g2 + 0x00]
fadds %f2, M13, %f2
st %f2, [%g2 + 0x04]
fadds %f3, M14, %f3
st %f3, [%g2 + 0x08]
3:
ba __set_v4f_3
nop
.globl _mesa_sparc_transform_points1_3d_no_rot
_mesa_sparc_transform_points1_3d_no_rot:
ld [%o2 + V4F_STRIDE], %o5
LDPTR [%o2 + V4F_START], %g1
LDPTR [%o0 + V4F_START], %g2
ld [%o2 + V4F_COUNT], %g3
LDMATRIX_0_12_13_14(%o1)
cmp %g3, 1
st %g3, [%o0 + V4F_COUNT]
bl 3f
clr %o1
be 2f
andn %g3, 1, %o2
1: ld [%g1 + 0x00], %f0 ! LSU Group
add %g1, %o5, %g1 ! IEU0
ld [%g1 + 0x00], %f2 ! LSU Group
add %o1, 2, %o1 ! IEU0
add %g1, %o5, %g1 ! IEU1
fmuls %f0, M0, %f1 ! FGM Group
fmuls %f2, M0, %f3 ! FGM Group
fadds %f1, M12, %f1 ! FGA Group, 2 cycle stall, f1 available
st %f1, [%g2 + 0x00] ! LSU
fadds %f3, M12, %f3 ! FGA Group, f3 available
st M13, [%g2 + 0x04] ! LSU
st M14, [%g2 + 0x08] ! LSU Group
st %f3, [%g2 + 0x10] ! LSU Group
st M13, [%g2 + 0x14] ! LSU Group
st M14, [%g2 + 0x18] ! LSU Group
cmp %o1, %o2 ! IEU1
bne 1b ! CTI
add %g2, 0x20, %g2 ! IEU0 Group
cmp %o1, %g3
be 3f
nop
2: ld [%g1 + 0x00], %f0
fmuls %f0, M0, %f1
fadds %f1, M12, %f1
st %f1, [%g2 + 0x00]
st M13, [%g2 + 0x04]
st M14, [%g2 + 0x08]
3:
ba __set_v4f_3
nop
.globl _mesa_sparc_transform_points1_perspective
_mesa_sparc_transform_points1_perspective:
ld [%o2 + V4F_STRIDE], %o5
LDPTR [%o2 + V4F_START], %g1
LDPTR [%o0 + V4F_START], %g2
ld [%o2 + V4F_COUNT], %g3
LDMATRIX_0_14(%o1)
cmp %g3, 1
st %g3, [%o0 + V4F_COUNT]
bl 3f
clr %o1
be 2f
andn %g3, 1, %o2
1: ld [%g1 + 0x00], %f0 ! LSU Group
add %g1, %o5, %g1 ! IEU0
ld [%g1 + 0x00], %f2 ! LSU Group
add %o1, 2, %o1 ! IEU0
add %g1, %o5, %g1 ! IEU1
fmuls %f0, M0, %f1 ! FGM Group
st %f1, [%g2 + 0x00] ! LSU
fmuls %f2, M0, %f3 ! FGM Group
st %g0, [%g2 + 0x04] ! LSU
st M14, [%g2 + 0x08] ! LSU Group
st %g0, [%g2 + 0x0c] ! LSU Group
st %f3, [%g2 + 0x10] ! LSU Group
st %g0, [%g2 + 0x14] ! LSU Group
st M14, [%g2 + 0x18] ! LSU Group
st %g0, [%g2 + 0x1c] ! LSU Group
cmp %o1, %o2 ! IEU1
bne 1b ! CTI
add %g2, 0x20, %g2 ! IEU0 Group
cmp %o1, %g3
be 3f
nop
2: ld [%g1 + 0x00], %f0
fmuls %f0, M0, %f1
st %f1, [%g2 + 0x00]
st %g0, [%g2 + 0x04]
st M14, [%g2 + 0x08]
st %g0, [%g2 + 0x0c]
3:
ba __set_v4f_4
nop
.globl _mesa_sparc_transform_points2_general
_mesa_sparc_transform_points2_general:
ld [%o2 + V4F_STRIDE], %o5
LDPTR [%o2 + V4F_START], %g1
LDPTR [%o0 + V4F_START], %g2
ld [%o2 + V4F_COUNT], %g3
LDMATRIX_0_1_2_3_4_5_6_7_12_13_14_15(%o1)
cmp %g3, 0
st %g3, [%o0 + V4F_COUNT]
be 2f
clr %o1
1: ld [%g1 + 0x00], %f0 ! LSU Group
ld [%g1 + 0x04], %f1 ! LSU Group
add %o1, 1, %o1 ! IEU0
add %g1, %o5, %g1 ! IEU1
fmuls %f0, M0, %f2 ! FGM Group
fmuls %f0, M1, %f3 ! FGM Group
fmuls %f0, M2, %f4 ! FGM Group
fmuls %f0, M3, %f5 ! FGM Group
fadds %f2, M12, %f2 ! FGA Group f2 available
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -