📄 viscsum.s
字号:
1: addcc %g3, %o2, %o2 /* IEU1 Group */ bcs,a,pn %xcc, 1f /* CTI */ add %o2, 1, %o2 /* IEU0 */1: addcc %g5, %o2, %o2 /* IEU1 Group */ bcs,a,pn %xcc, 1f /* CTI */ add %o2, 1, %o2 /* IEU0 */1: addcc %g7, %o2, %o2 /* IEU1 Group */ bcs,a,pn %xcc, 3f /* CTI */ add %o2, 1, %o2 /* IEU0 */3: cmp %o1, 0xc0 /* IEU1 Group */ blu,pn %icc, 20f /* CTI */ sllx %o2, 32, %g5 /* IEU0 */#ifdef __KERNEL__ VISEntry#endif addcc %o2, %g5, %o2 /* IEU1 Group */ sub %o1, 0xc0, %o1 /* IEU0 */ wr %g0, ASI_BLK_P, %asi /* LSU Group */ membar #StoreLoad /* LSU Group */ srlx %o2, 32, %o2 /* IEU0 Group */ bcs,a,pn %xcc, 1f /* CTI */ add %o2, 1, %o2 /* IEU1 */1: andcc %o1, 0x80, %g0 /* IEU1 Group */ bne,pn %icc, 7f /* CTI */ andcc %o1, 0x40, %g0 /* IEU1 Group */ be,pn %icc, 6f /* CTI */ fzero %f12 /* FPA */ fzero %f14 /* FPA Group */ ldda [%o0 + 0x000] %asi, %f16 ldda [%o0 + 0x040] %asi, %f32 ldda [%o0 + 0x080] %asi, %f48 START_THE_TRICK(f12,f16,f18,f20,f22,f24,f26) ba,a,pt %xcc, 3f6: sub %o0, 0x40, %o0 /* IEU0 Group */ fzero %f28 /* FPA */ fzero %f30 /* FPA Group */ ldda [%o0 + 0x040] %asi, %f32 ldda [%o0 + 0x080] %asi, %f48 ldda [%o0 + 0x0c0] %asi, %f0 START_THE_TRICK(f28,f32,f34,f36,f38,f40,f42) ba,a,pt %xcc, 4f7: bne,pt %icc, 8f /* CTI */ fzero %f44 /* FPA */ add %o0, 0x40, %o0 /* IEU0 Group */ fzero %f60 /* FPA */ fzero %f62 /* FPA Group */ ldda [%o0 - 0x040] %asi, %f0 ldda [%o0 + 0x000] %asi, %f16 ldda [%o0 + 0x040] %asi, %f32 START_THE_TRICK(f60,f0,f2,f4,f6,f8,f10) ba,a,pt %xcc, 2f8: add %o0, 0x80, %o0 /* IEU0 Group */ fzero %f46 /* FPA */ ldda [%o0 - 0x080] %asi, %f48 ldda [%o0 - 0x040] %asi, %f0 ldda [%o0 + 0x000] %asi, %f16 START_THE_TRICK(f44,f48,f50,f52,f54,f56,f58)1: DO_THE_TRICK(f44,f46,f48,f50,f52,f54,f56,f58,f60,f62,f0,f2,f4,f6,f8,f10,f12,f14) ldda [%o0 + 0x040] %asi, %f322: DO_THE_TRICK(f60,f62,f0,f2,f4,f6,f8,f10,f12,f14,f16,f18,f20,f22,f24,f26,f28,f30) ldda [%o0 + 0x080] %asi, %f483: DO_THE_TRICK(f12,f14,f16,f18,f20,f22,f24,f26,f28,f30,f32,f34,f36,f38,f40,f42,f44,f46) ldda [%o0 + 0x0c0] %asi, %f04: DO_THE_TRICK(f28,f30,f32,f34,f36,f38,f40,f42,f44,f46,f48,f50,f52,f54,f56,f58,f60,f62) add %o0, 0x100, %o0 /* IEU0 Group */ subcc %o1, 0x100, %o1 /* IEU1 */ bgeu,a,pt %icc, 1b /* CTI */ ldda [%o0 + 0x000] %asi, %f16 membar #Sync /* LSU Group */ DO_THE_TRICK(f44,f46,f48,f50,f52,f54,f56,f58,f60,f62,f0,f2,f4,f6,f8,f10,f12,f14) END_THE_TRICK(f60,f62,f0,f2,f4,f6,f8,f10,f12,f14,f16,f18,f20,f22,f24,f26,f28,f30)#ifdef __KERNEL__ ldub [%g6 + TI_CURRENT_DS], %g7#endif and %o1, 0x3f, %o1 /* IEU0 Group */#ifdef __KERNEL__ VISExit wr %g7, %g0, %asi#endif20: andcc %o1, 0xf0, %g1 /* IEU1 Group */ be,pn %icc, 23f /* CTI */ and %o1, 0xf, %o3 /* IEU0 */#ifdef __KERNEL__22: sll %g1, 1, %o4 /* IEU0 Group */ sethi %hi(23f), %g7 /* IEU1 */ sub %g7, %o4, %g7 /* IEU0 Group */ jmpl %g7 + %lo(23f), %g0 /* CTI Group brk forced*/ add %o0, %g1, %o0 /* IEU0 */#else22: rd %pc, %g7 /* LSU Group+4bubbles */ sll %g1, 1, %o4 /* IEU0 Group */ sub %g7, %o4, %g7 /* IEU0 Group (regdep) */ jmpl %g7 + (23f - 22b), %g0 /* CTI Group brk forced*/ add %o0, %g1, %o0 /* IEU0 */#endif CSUM_LASTCHUNK(0xe0) CSUM_LASTCHUNK(0xd0) CSUM_LASTCHUNK(0xc0) CSUM_LASTCHUNK(0xb0) CSUM_LASTCHUNK(0xa0) CSUM_LASTCHUNK(0x90) CSUM_LASTCHUNK(0x80) CSUM_LASTCHUNK(0x70) CSUM_LASTCHUNK(0x60) CSUM_LASTCHUNK(0x50) CSUM_LASTCHUNK(0x40) CSUM_LASTCHUNK(0x30) CSUM_LASTCHUNK(0x20) CSUM_LASTCHUNK(0x10) CSUM_LASTCHUNK(0x00)23: brnz,pn %o3, 26f /* CTI+IEU1 Group */24: sllx %o2, 32, %g1 /* IEU0 */25: addcc %o2, %g1, %o0 /* IEU1 Group */ srlx %o0, 32, %o0 /* IEU0 Group (regdep) */ bcs,a,pn %xcc, 1f /* CTI */ add %o0, 1, %o0 /* IEU1 */1: retl /* CTI Group brk forced*/ srl %o0, 0, %o0 /* IEU0 */26: andcc %o1, 8, %g0 /* IEU1 Group */ be,pn %icc, 1f /* CTI */ ldx [%o0], %g3 /* Load */ add %o0, 8, %o0 /* IEU0 Group */ addcc %g3, %o2, %o2 /* IEU1 Group */ bcs,a,pn %xcc, 1f /* CTI */ add %o2, 1, %o2 /* IEU0 */1: andcc %o1, 4, %g0 /* IEU1 Group */ be,a,pn %icc, 1f /* CTI */ clr %g2 /* IEU0 */ ld [%o0], %g2 /* Load */ add %o0, 4, %o0 /* IEU0 Group */ sllx %g2, 32, %g2 /* IEU0 Group */1: andcc %o1, 2, %g0 /* IEU1 */ be,a,pn %icc, 1f /* CTI */ clr %o4 /* IEU0 Group */ lduh [%o0], %o4 /* Load */ add %o0, 2, %o0 /* IEU1 */ sll %o4, 16, %o4 /* IEU0 Group */1: andcc %o1, 1, %g0 /* IEU1 */ be,a,pn %icc, 1f /* CTI */ clr %o5 /* IEU0 Group */ ldub [%o0], %o5 /* Load */ sll %o5, 8, %o5 /* IEU0 Group */1: or %g2, %o4, %o4 /* IEU1 */ or %o5, %o4, %o4 /* IEU0 Group (regdep) */ addcc %o4, %o2, %o2 /* IEU1 Group (regdep) */ bcs,a,pn %xcc, 1f /* CTI */ add %o2, 1, %o2 /* IEU0 */1: ba,pt %xcc, 25b /* CTI Group */ sllx %o2, 32, %g1 /* IEU0 */21: srl %o2, 0, %o2 /* IEU0 Group */ cmp %o1, 0 /* IEU1 */ be,pn %icc, 24b /* CTI */ andcc %o1, 4, %g0 /* IEU1 Group */ be,a,pn %icc, 1f /* CTI */ clr %g2 /* IEU0 */ lduh [%o0], %g3 /* Load */ lduh [%o0+2], %g2 /* Load Group */ add %o0, 4, %o0 /* IEU0 Group */ sllx %g3, 48, %g3 /* IEU0 Group */ sllx %g2, 32, %g2 /* IEU0 Group */ or %g3, %g2, %g2 /* IEU0 Group */1: andcc %o1, 2, %g0 /* IEU1 */ be,a,pn %icc, 1f /* CTI */ clr %o4 /* IEU0 Group */ lduh [%o0], %o4 /* Load */ add %o0, 2, %o0 /* IEU1 */ sll %o4, 16, %o4 /* IEU0 Group */1: andcc %o1, 1, %g0 /* IEU1 */ be,a,pn %icc, 1f /* CTI */ clr %o5 /* IEU0 Group */ ldub [%o0], %o5 /* Load */ sll %o5, 8, %o5 /* IEU0 Group */1: or %g2, %o4, %o4 /* IEU1 */ or %o5, %o4, %o4 /* IEU0 Group (regdep) */ addcc %o4, %o2, %o2 /* IEU1 Group (regdep) */ bcs,a,pn %xcc, 1f /* CTI */ add %o2, 1, %o2 /* IEU0 */1: ba,pt %xcc, 25b /* CTI Group */ sllx %o2, 32, %g1 /* IEU0 */ /* When buff is byte aligned and len is large, we backoff to * this really slow handling. The issue is that we cannot do * the VIS stuff when buff is byte aligned as unaligned.c will * not fix it up. */csump_really_slow: mov %o0, %o3 mov %o1, %o4 cmp %o1, 0 ble,pn %icc, 9f mov 0, %o0 andcc %o3, 1, %o5 be,pt %icc, 1f sra %o4, 1, %g3 add %o1, -1, %o4 ldub [%o3], %o0 add %o3, 1, %o3 sra %o4, 1, %g31: cmp %g3, 0 be,pt %icc, 3f and %o4, 1, %g2 and %o3, 2, %g2 brz,a,pt %g2, 1f sra %g3, 1, %g3 add %g3, -1, %g3 add %o4, -2, %o4 lduh [%o3], %g2 add %o3, 2, %o3 add %o0, %g2, %o0 sra %g3, 1, %g31: cmp %g3, 0 be,pt %icc, 2f and %o4, 2, %g21: ld [%o3], %g2 addcc %o0, %g2, %o0 addx %o0, %g0, %o0 addcc %g3, -1, %g3 bne,pt %icc, 1b add %o3, 4, %o3 srl %o0, 16, %o1 sethi %hi(64512), %g2 or %g2, 1023, %g2 and %o0, %g2, %g3 add %g3, %o1, %g3 srl %g3, 16, %o0 and %g3, %g2, %g2 add %g2, %o0, %g3 sll %g3, 16, %g3 srl %g3, 16, %o0 and %o4, 2, %g22: cmp %g2, 0 be,pt %icc, 3f and %o4, 1, %g2 lduh [%o3], %g2 add %o3, 2, %o3 add %o0, %g2, %o0 and %o4, 1, %g23: cmp %g2, 0 be,pt %icc, 1f srl %o0, 16, %o1 ldub [%o3], %g2 sll %g2, 8, %g2 add %o0, %g2, %o0 srl %o0, 16, %o11: sethi %hi(64512), %g2 or %g2, 1023, %g2 cmp %o5, 0 and %o0, %g2, %g3 add %g3, %o1, %g3 srl %g3, 16, %o0 and %g3, %g2, %g2 add %g2, %o0, %g3 sll %g3, 16, %g3 srl %g3, 16, %o0 srl %g3, 24, %g3 and %o0, 255, %g2 sll %g2, 8, %g2 bne,pt %icc, 1f or %g3, %g2, %g29: mov %o0, %g21: addcc %g2, %o2, %g2 addx %g2, %g0, %g2 retl srl %g2, 0, %o0
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -