📄 clip.s
字号:
/* * Clip testing in SPARC assembly */#if __arch64__#define LDPTR ldx#define V4F_DATA 0x00#define V4F_START 0x08#define V4F_COUNT 0x10#define V4F_STRIDE 0x14#define V4F_SIZE 0x18#define V4F_FLAGS 0x1c#else#define LDPTR ld#define V4F_DATA 0x00#define V4F_START 0x04#define V4F_COUNT 0x08#define V4F_STRIDE 0x0c#define V4F_SIZE 0x10#define V4F_FLAGS 0x14#endif#define VEC_SIZE_1 1#define VEC_SIZE_2 3#define VEC_SIZE_3 7#define VEC_SIZE_4 15#if defined(SVR4) || defined(__SVR4) || defined(__svr4__) /* Solaris requires this for 64-bit. */ .register %g2, #scratch .register %g3, #scratch .register %g7, #scratch#endif .text .align 64one_dot_zero: .word 0x3f800000 /* 1.0f */ /* This trick is shamelessly stolen from the x86 * Mesa asm. Very clever, and we can do it too * since we have the necessary add with carry * instructions on Sparc. */clip_table: .byte 0, 1, 0, 2, 4, 5, 4, 6 .byte 0, 1, 0, 2, 8, 9, 8, 10 .byte 32, 33, 32, 34, 36, 37, 36, 38 .byte 32, 33, 32, 34, 40, 41, 40, 42 .byte 0, 1, 0, 2, 4, 5, 4, 6 .byte 0, 1, 0, 2, 8, 9, 8, 10 .byte 16, 17, 16, 18, 20, 21, 20, 22 .byte 16, 17, 16, 18, 24, 25, 24, 26 .byte 63, 61, 63, 62, 55, 53, 55, 54 .byte 63, 61, 63, 62, 59, 57, 59, 58 .byte 47, 45, 47, 46, 39, 37, 39, 38 .byte 47, 45, 47, 46, 43, 41, 43, 42 .byte 63, 61, 63, 62, 55, 53, 55, 54 .byte 63, 61, 63, 62, 59, 57, 59, 58 .byte 31, 29, 31, 30, 23, 21, 23, 22 .byte 31, 29, 31, 30, 27, 25, 27, 26/* GLvector4f *clip_vec, GLvector4f *proj_vec, GLubyte clipMask[], GLubyte *orMask, GLubyte *andMask */ .align 64__pc_tramp: retl nop .globl _mesa_sparc_cliptest_points4_mesa_sparc_cliptest_points4: save %sp, -64, %sp call __pc_tramp sub %o7, (. - one_dot_zero - 4), %g1 ld [%g1 + 0x0], %f4 add %g1, 0x4, %g1 ld [%i0 + V4F_STRIDE], %l1 ld [%i0 + V4F_COUNT], %g7 LDPTR [%i0 + V4F_START], %i0 LDPTR [%i1 + V4F_START], %i5 ldub [%i3], %g2 ldub [%i4], %g3 sll %g3, 8, %g3 or %g2, %g3, %g2 ld [%i1 + V4F_FLAGS], %g3 or %g3, VEC_SIZE_4, %g3 st %g3, [%i1 + V4F_FLAGS] mov 3, %g3 st %g3, [%i1 + V4F_SIZE] st %g7, [%i1 + V4F_COUNT] clr %l2 clr %l0 /* l0: i * g7: count * l1: stride * l2: c * g2: (tmpAndMask << 8) | tmpOrMask * g1: clip_table * i0: from[stride][i] * i2: clipMask * i5: vProj[4][i] */1: ld [%i0 + 0x0c], %f3 ! LSU Group ld [%i0 + 0x0c], %g5 ! LSU Group ld [%i0 + 0x08], %g4 ! LSU Group fdivs %f4, %f3, %f8 ! FGM addcc %g5, %g5, %g5 ! IEU1 Group addx %g0, 0x0, %g3 ! IEU1 Group addcc %g4, %g4, %g4 ! IEU1 Group addx %g3, %g3, %g3 ! IEU1 Group subcc %g5, %g4, %g0 ! IEU1 Group ld [%i0 + 0x04], %g4 ! LSU Group addx %g3, %g3, %g3 ! IEU1 Group addcc %g4, %g4, %g4 ! IEU1 Group addx %g3, %g3, %g3 ! IEU1 Group subcc %g5, %g4, %g0 ! IEU1 Group ld [%i0 + 0x00], %g4 ! LSU Group addx %g3, %g3, %g3 ! IEU1 Group addcc %g4, %g4, %g4 ! IEU1 Group addx %g3, %g3, %g3 ! IEU1 Group subcc %g5, %g4, %g0 ! IEU1 Group addx %g3, %g3, %g3 ! IEU1 Group ldub [%g1 + %g3], %g3 ! LSU Group cmp %g3, 0 ! IEU1 Group, stall be 2f ! CTI stb %g3, [%i2] ! LSU sll %g3, 8, %g4 ! IEU1 Group add %l2, 1, %l2 ! IEU0 st %g0, [%i5 + 0x00] ! LSU or %g4, 0xff, %g4 ! IEU0 Group or %g2, %g3, %g2 ! IEU1 st %g0, [%i5 + 0x04] ! LSU and %g2, %g4, %g2 ! IEU0 Group st %g0, [%i5 + 0x08] ! LSU b 3f ! CTI st %f4, [%i5 + 0x0c] ! LSU Group2: ld [%i0 + 0x00], %f0 ! LSU Group ld [%i0 + 0x04], %f1 ! LSU Group ld [%i0 + 0x08], %f2 ! LSU Group fmuls %f0, %f8, %f0 ! FGM st %f0, [%i5 + 0x00] ! LSU Group fmuls %f1, %f8, %f1 ! FGM st %f1, [%i5 + 0x04] ! LSU Group fmuls %f2, %f8, %f2 ! FGM st %f2, [%i5 + 0x08] ! LSU Group st %f8, [%i5 + 0x0c] ! LSU Group3: add %i5, 0x10, %i5 ! IEU1 add %l0, 1, %l0 ! IEU0 Group add %i2, 1, %i2 ! IEU0 Group cmp %l0, %g7 ! IEU1 Group bne 1b ! CTI add %i0, %l1, %i0 ! IEU0 Group stb %g2, [%i3] ! LSU srl %g2, 8, %g3 ! IEU0 Group cmp %l2, %g7 ! IEU1 Group bl,a 1f ! CTI clr %g3 ! IEU01: stb %g3, [%i4] ! LSU Group ret ! CTI Group restore %i1, 0x0, %o0 .globl _mesa_sparc_cliptest_points4_np_mesa_sparc_cliptest_points4_np: save %sp, -64, %sp call __pc_tramp sub %o7, (. - one_dot_zero - 4), %g1 add %g1, 0x4, %g1 ld [%i0 + V4F_STRIDE], %l1 ld [%i0 + V4F_COUNT], %g7 LDPTR [%i0 + V4F_START], %i0 LDPTR [%i1 + V4F_START], %i5 ldub [%i3], %g2 ldub [%i4], %g3 sll %g3, 8, %g3 or %g2, %g3, %g2 ld [%i1 + V4F_FLAGS], %g3 or %g3, VEC_SIZE_4, %g3 st %g3, [%i1 + V4F_FLAGS] mov 3, %g3 st %g3, [%i1 + V4F_SIZE] st %g7, [%i1 + V4F_COUNT] clr %l2 clr %l0 /* l0: i * g7: count * l1: stride * l2: c * g2: (tmpAndMask << 8) | tmpOrMask * g1: clip_table * i0: from[stride][i] * i2: clipMask */1: ld [%i0 + 0x0c], %g5 ! LSU Group ld [%i0 + 0x08], %g4 ! LSU Group addcc %g5, %g5, %g5 ! IEU1 Group addx %g0, 0x0, %g3 ! IEU1 Group addcc %g4, %g4, %g4 ! IEU1 Group addx %g3, %g3, %g3 ! IEU1 Group subcc %g5, %g4, %g0 ! IEU1 Group ld [%i0 + 0x04], %g4 ! LSU Group addx %g3, %g3, %g3 ! IEU1 Group addcc %g4, %g4, %g4 ! IEU1 Group addx %g3, %g3, %g3 ! IEU1 Group subcc %g5, %g4, %g0 ! IEU1 Group ld [%i0 + 0x00], %g4 ! LSU Group addx %g3, %g3, %g3 ! IEU1 Group addcc %g4, %g4, %g4 ! IEU1 Group addx %g3, %g3, %g3 ! IEU1 Group subcc %g5, %g4, %g0 ! IEU1 Group addx %g3, %g3, %g3 ! IEU1 Group ldub [%g1 + %g3], %g3 ! LSU Group cmp %g3, 0 ! IEU1 Group, stall be 2f ! CTI stb %g3, [%i2] ! LSU sll %g3, 8, %g4 ! IEU1 Group add %l2, 1, %l2 ! IEU0 or %g4, 0xff, %g4 ! IEU0 Group or %g2, %g3, %g2 ! IEU1 and %g2, %g4, %g2 ! IEU0 Group2: add %l0, 1, %l0 ! IEU0 Group add %i2, 1, %i2 ! IEU0 Group cmp %l0, %g7 ! IEU1 Group bne 1b ! CTI add %i0, %l1, %i0 ! IEU0 Group stb %g2, [%i3] ! LSU srl %g2, 8, %g3 ! IEU0 Group cmp %l2, %g7 ! IEU1 Group bl,a 1f ! CTI clr %g3 ! IEU01: stb %g3, [%i4] ! LSU Group ret ! CTI Group restore %i1, 0x0, %o0
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -