📄 viscsumcopy.s
字号:
,add %dst, 96, %dst; add %len, 192 - 5*8, %len; ba,pt %icc, e3)vis3e2: DO_THE_TRICK( f32,f34,f36,f38,f40,f42,f44,f46,f0,f2,f4,f6,f8,f10,f12,f14, ,f58,f60,f62,f48,f50,f52,f54,f56,f16, ,SYNC, ,,,STBLK,ST(f48,64),ST(f50,72),ST(f52,80),ST(f54,88), ,add %dst, 96, %dst; add %len, 192 - 5*8, %len; ba,pt %icc, e1) .align 2048vis4s: wr %g2, ASI_BLK_XOR, %asi /* LSU Group */ add %src, 128 - 32, %src /* IEU0 Group */ ldda [%src-128] %asi, %f0 /* Load Group */ ldda [%src-64] %asi, %f16 /* Load Group */ fmovd %f0, %f52 /* FPA Group */ fmovd %f48, %f0 /* FPA Group */ sub %dst, 64, %dst /* IEU0 */ fpsub32 %f2, %f2, %f2 /* FPA Group */ fpsub32 %f4, %f4, %f4 /* FPA Group */ fpsub32 %f6, %f6, %f6 /* FPA Group */ clr %x4 /* IEU0 */ fcmpgt32 %f32, %f8, %x5 /* FPM Group */ faligndata %f8, %f10, %f48 /* FPA */ fcmpgt32 %f32, %f10, %x6 /* FPM Group */ faligndata %f10, %f12, %f50 /* FPA */ fcmpgt32 %f32, %f12, %x7 /* FPM Group */ faligndata %f12, %f14, %f52 /* FPA */ fcmpgt32 %f32, %f14, %x8 /* FPM Group */ fmovd %f14, %f54 /* FPA */vis4: DO_THE_TRICK( f0,f2,f4,f6,f8,f10,f12,f14,f16,f18,f20,f22,f24,f26,f28,f30, ,f56,f58,f60,f62,f48,f50,f52,f54,f54, ,LDBLK(f32), ,,,,STBLK,,,, ,bcs,pn %icc, vis4e1) DO_THE_TRICK( f16,f18,f20,f22,f24,f26,f28,f30,f32,f34,f36,f38,f40,f42,f44,f46, ,f56,f58,f60,f62,f48,f50,f52,f54,f54, ,LDBLK(f0), ,,,,STBLK,,,, ,bcs,pn %icc, vis4e2) DO_THE_TRICK( f32,f34,f36,f38,f40,f42,f44,f46,f0,f2,f4,f6,f8,f10,f12,f14, ,f56,f58,f60,f62,f48,f50,f52,f54,f54, ,LDBLK(f16), ,,,,STBLK,,,, ,bcc,pt %icc, vis4)vis4e3: DO_THE_TRICK( f0,f2,f4,f6,f8,f10,f12,f14,f16,f18,f20,f22,f24,f26,f28,f30, ,f56,f58,f60,f62,f48,f50,f52,f54,f32, ,SYNC, ,,,,STBLK,ST(f48,64),ST(f50,72),ST(f52,80), ,add %dst, 88, %dst; add %len, 192 - 4*8, %len; ba,pt %icc, e2)vis4e1: DO_THE_TRICK( f16,f18,f20,f22,f24,f26,f28,f30,f32,f34,f36,f38,f40,f42,f44,f46, ,f56,f58,f60,f62,f48,f50,f52,f54,f0, ,SYNC, ,,,,STBLK,ST(f48,64),ST(f50,72),ST(f52,80), ,add %dst, 88, %dst; add %len, 192 - 4*8, %len; ba,pt %icc, e3)vis4e2: DO_THE_TRICK( f32,f34,f36,f38,f40,f42,f44,f46,f0,f2,f4,f6,f8,f10,f12,f14, ,f56,f58,f60,f62,f48,f50,f52,f54,f16, ,SYNC, ,,,,STBLK,ST(f48,64),ST(f50,72),ST(f52,80), ,add %dst, 88, %dst; add %len, 192 - 4*8, %len; ba,pt %icc, e1) .align 2048vis5s: add %src, 128 - 40, %src /* IEU0 Group */ ldda [%src-88] %asi, %f10 /* Load Group */ ldda [%src-80] %asi, %f12 /* Load Group */ ldda [%src-72] %asi, %f14 /* Load Group */ wr %g2, ASI_BLK_XOR, %asi /* LSU Group */ ldda [%src-64] %asi, %f16 /* Load Group */ fmovd %f48, %f0 /* FPA Group */ fmuld %f32, %f32, %f2 /* FPM */ clr %x4 /* IEU0 */ faddd %f32, %f32, %f4 /* FPA Group */ fmuld %f32, %f32, %f6 /* FPM */ clr %x5 /* IEU0 */ faddd %f32, %f32, %f8 /* FPA Group */ fcmpgt32 %f32, %f10, %x6 /* FPM Group */ sub %dst, 64, %dst /* IEU0 */ faligndata %f10, %f12, %f48 /* FPA */ fcmpgt32 %f32, %f12, %x7 /* FPM Group */ faligndata %f12, %f14, %f50 /* FPA */ fcmpgt32 %f32, %f14, %x8 /* FPM Group */ fmovd %f14, %f52 /* FPA */vis5: DO_THE_TRICK( f0,f2,f4,f6,f8,f10,f12,f14,f16,f18,f20,f22,f24,f26,f28,f30, ,f54,f56,f58,f60,f62,f48,f50,f52,f52, ,LDBLK(f32), ,,,,,STBLK,,, ,bcs,pn %icc, vis5e1) DO_THE_TRICK( f16,f18,f20,f22,f24,f26,f28,f30,f32,f34,f36,f38,f40,f42,f44,f46, ,f54,f56,f58,f60,f62,f48,f50,f52,f52, ,LDBLK(f0), ,,,,,STBLK,,, ,bcs,pn %icc, vis5e2) DO_THE_TRICK( f32,f34,f36,f38,f40,f42,f44,f46,f0,f2,f4,f6,f8,f10,f12,f14, ,f54,f56,f58,f60,f62,f48,f50,f52,f52, ,LDBLK(f16), ,,,,,STBLK,,, ,bcc,pt %icc, vis5)vis5e3: DO_THE_TRICK( f0,f2,f4,f6,f8,f10,f12,f14,f16,f18,f20,f22,f24,f26,f28,f30, ,f54,f56,f58,f60,f62,f48,f50,f52,f32, ,SYNC, ,,,,,STBLK,ST(f48,64),ST(f50,72), ,add %dst, 80, %dst; add %len, 192 - 3*8, %len; ba,pt %icc, e2)vis5e1: DO_THE_TRICK( f16,f18,f20,f22,f24,f26,f28,f30,f32,f34,f36,f38,f40,f42,f44,f46, ,f54,f56,f58,f60,f62,f48,f50,f52,f0, ,SYNC, ,,,,,STBLK,ST(f48,64),ST(f50,72), ,add %dst, 80, %dst; add %len, 192 - 3*8, %len; ba,pt %icc, e3)vis5e2: DO_THE_TRICK( f32,f34,f36,f38,f40,f42,f44,f46,f0,f2,f4,f6,f8,f10,f12,f14, ,f54,f56,f58,f60,f62,f48,f50,f52,f16, ,SYNC, ,,,,,STBLK,ST(f48,64),ST(f50,72), ,add %dst, 80, %dst; add %len, 192 - 3*8, %len; ba,pt %icc, e1) .align 2048vis6s: add %src, 128 - 48, %src /* IEU0 Group */ ldda [%src-80] %asi, %f12 /* Load Group */ ldda [%src-72] %asi, %f14 /* Load Group */ wr %g2, ASI_BLK_XOR, %asi /* LSU Group */ ldda [%src-64] %asi, %f16 /* Load Group */ fmovd %f48, %f0 /* FPA Group */ fmuld %f32, %f32, %f2 /* FPM */ clr %x4 /* IEU0 */ faddd %f32, %f32, %f4 /* FPA Group */ fmuld %f32, %f32, %f6 /* FPM */ clr %x5 /* IEU0 */ faddd %f32, %f32, %f8 /* FPA Group */ fmuld %f32, %f32, %f10 /* FPM */ clr %x6 /* IEU0 */ fcmpgt32 %f32, %f12, %x7 /* FPM Group */ sub %dst, 64, %dst /* IEU0 */ fcmpgt32 %f32, %f14, %x8 /* FPM Group */ faligndata %f12, %f14, %f48 /* FPA */ fmovd %f14, %f50 /* FPA Group */vis6: DO_THE_TRICK( f0,f2,f4,f6,f8,f10,f12,f14,f16,f18,f20,f22,f24,f26,f28,f30, ,f52,f54,f56,f58,f60,f62,f48,f50,f50, ,LDBLK(f32), ,,,,,,STBLK,, ,bcs,pn %icc, vis6e1) DO_THE_TRICK( f16,f18,f20,f22,f24,f26,f28,f30,f32,f34,f36,f38,f40,f42,f44,f46, ,f52,f54,f56,f58,f60,f62,f48,f50,f50, ,LDBLK(f0), ,,,,,,STBLK,, ,bcs,pn %icc, vis6e2) DO_THE_TRICK( f32,f34,f36,f38,f40,f42,f44,f46,f0,f2,f4,f6,f8,f10,f12,f14, ,f52,f54,f56,f58,f60,f62,f48,f50,f50, ,LDBLK(f16), ,,,,,,STBLK,, ,bcc,pt %icc, vis6)vis6e3: DO_THE_TRICK( f0,f2,f4,f6,f8,f10,f12,f14,f16,f18,f20,f22,f24,f26,f28,f30, ,f52,f54,f56,f58,f60,f62,f48,f50,f32, ,SYNC, ,,,,,,STBLK,ST(f48,64), ,add %dst, 72, %dst; add %len, 192 - 2*8, %len; ba,pt %icc, e2)vis6e1: DO_THE_TRICK( f16,f18,f20,f22,f24,f26,f28,f30,f32,f34,f36,f38,f40,f42,f44,f46, ,f52,f54,f56,f58,f60,f62,f48,f50,f0, ,SYNC, ,,,,,,STBLK,ST(f48,64), ,add %dst, 72, %dst; add %len, 192 - 2*8, %len; ba,pt %icc, e3)vis6e2: DO_THE_TRICK( f32,f34,f36,f38,f40,f42,f44,f46,f0,f2,f4,f6,f8,f10,f12,f14, ,f52,f54,f56,f58,f60,f62,f48,f50,f16, ,SYNC, ,,,,,,STBLK,ST(f48,64), ,add %dst, 72, %dst; add %len, 192 - 2*8, %len; ba,pt %icc, e1) .align 2048vis7s: add %src, 128 - 56, %src /* IEU0 Group */ ldda [%src-72] %asi, %f14 /* Load Group */ wr %g2, ASI_BLK_XOR, %asi /* LSU Group */ ldda [%src-64] %asi, %f16 /* Load Group */ fmovd %f48, %f0 /* FPA Group */ fmuld %f32, %f32, %f2 /* FPM */ clr %x4 /* IEU0 */ faddd %f32, %f32, %f4 /* FPA Group */ fmuld %f32, %f32, %f6 /* FPM */ clr %x5 /* IEU0 */ faddd %f32, %f32, %f8 /* FPA Group */ fmuld %f32, %f32, %f10 /* FPM */ clr %x6 /* IEU0 */ faddd %f32, %f32, %f12 /* FPA Group */ clr %x7 /* IEU0 */ fcmpgt32 %f32, %f14, %x8 /* FPM Group */ sub %dst, 64, %dst /* IEU0 */ fmovd %f14, %f48 /* FPA */vis7: DO_THE_TRICK( f0,f2,f4,f6,f8,f10,f12,f14,f16,f18,f20,f22,f24,f26,f28,f30, ,f50,f52,f54,f56,f58,f60,f62,f48,f48, ,LDBLK(f32), ,,,,,,,STBLK, ,bcs,pn %icc, vis7e1) DO_THE_TRICK( f16,f18,f20,f22,f24,f26,f28,f30,f32,f34,f36,f38,f40,f42,f44,f46, ,f50,f52,f54,f56,f58,f60,f62,f48,f48, ,LDBLK(f0), ,,,,,,,STBLK, ,bcs,pn %icc, vis7e2) DO_THE_TRICK( f32,f34,f36,f38,f40,f42,f44,f46,f0,f2,f4,f6,f8,f10,f12,f14, ,f50,f52,f54,f56,f58,f60,f62,f48,f48, ,LDBLK(f16), ,,,,,,,STBLK, ,bcc,pt %icc, vis7)vis7e3: DO_THE_TRICK( f0,f2,f4,f6,f8,f10,f12,f14,f16,f18,f20,f22,f24,f26,f28,f30, ,f50,f52,f54,f56,f58,f60,f62,f48,f32, ,SYNC, ,,,,,,,STBLK, ,add %dst, 64, %dst; add %len, 192 - 1*8, %len; ba,pt %icc, e2)vis7e1: DO_THE_TRICK( f16,f18,f20,f22,f24,f26,f28,f30,f32,f34,f36,f38,f40,f42,f44,f46, ,f50,f52,f54,f56,f58,f60,f62,f48,f0, ,SYNC, ,,,,,,,STBLK, ,add %dst, 64, %dst; add %len, 192 - 1*8, %len; ba,pt %icc, e3)vis7e2: DO_THE_TRICK( f32,f34,f36,f38,f40,f42,f44,f46,f0,f2,f4,f6,f8,f10,f12,f14, ,f50,f52,f54,f56,f58,f60,f62,f48,f16, ,SYNC, ,,,,,,,STBLK, ,add %dst, 64, %dst; add %len, 192 - 1*8, %len; ba,pt %icc, e1)e1: END_THE_TRICK1( f0,f2,f4,f6,f8,f10,f12,f14,f16,f6)e2: END_THE_TRICK1( f16,f18,f20,f22,f24,f26,f28,f30,f32,f6)e3: END_THE_TRICK1( f32,f34,f36,f38,f40,f42,f44,f46,f0,f6)ett: rd %asi, %x4 /* LSU Group+4bubbles */ rd %gsr, %x3 /* LSU Group+4bubbles */#ifdef __KERNEL__ srl %x4, 3, %x5 /* IEU0 Group */ xor %x4, ASI_BLK_XOR1, %x4 /* IEU1 */ wr %x4, %x5, %asi /* LSU Group+4bubbles */#else wr %x4, ASI_BLK_XOR, %asi /* LSU Group+4bubbles */#endif andcc %x3, 7, %x3 /* IEU1 Group */ add %dst, 8, %dst /* IEU0 */ bne,pn %icc, 1f /* CTI */ fzero %f10 /* FPA */ brz,a,pn %len, 2f /* CTI+IEU1 Group */ std %f6, [%dst - 8] /* Store */1: cmp %len, 8 /* IEU1 */ blu,pn %icc, 3f /* CTI */ sub %src, 64, %src /* IEU0 Group */1: ldda [%src] %asi, %f2 /* Load Group */ fpadd32 %f10, %f2, %f12 /* FPA Group+load stall*/ add %src, 8, %src /* IEU0 */ add %dst, 8, %dst /* IEU1 */ faligndata %f6, %f2, %f14 /* FPA Group */ fcmpgt32 %f10, %f12, %x5 /* FPM Group */ std %f14, [%dst - 16] /* Store */ fmovd %f2, %f6 /* FPA */ fmovd %f12, %f10 /* FPA Group */ sub %len, 8, %len /* IEU1 */ fzero %f16 /* FPA Group - FPU nop */ fzero %f18 /* FPA Group - FPU nop */ inc %x5 /* IEU0 */ srl %x5, 1, %x5 /* IEU0 Group (regdep) */ cmp %len, 8 /* IEU1 */ bgeu,pt %icc, 1b /* CTI */ add %x5, %sum, %sum /* IEU0 Group */3: brz,a,pt %x3, 2f /* CTI+IEU1 */ std %f6, [%dst - 8] /* Store Group */ st %f7, [%dst - 8] /* Store Group */ sub %dst, 4, %dst /* IEU0 */ add %len, 4, %len /* IEU1 */2:#ifdef __KERNEL__ sub %sp, 8, %sp /* IEU0 Group */#endif END_THE_TRICK2( f48,f50,f52,f54,f56,f58,f60,f10,f12,f62) membar #Sync /* LSU Group */#ifdef __KERNEL__ VISExit add %sp, 8, %sp /* IEU0 Group */#endif23: brnz,pn %len, 26f /* CTI+IEU1 Group */24: sllx %sum, 32, %g1 /* IEU0 */25: addcc %sum, %g1, %src /* IEU1 Group */ srlx %src, 32, %src /* IEU0 Group (regdep) */ bcs,a,pn %xcc, 1f /* CTI */ add %src, 1, %src /* IEU1 */#ifndef __KERNEL__1: retl /* CTI Group brk forced*/ srl %src, 0, %src /* IEU0 */#else1: retl /* CTI Group brk forced*/ ldx [%g6 + TI_TASK], %g4 /* Load */#endif26: andcc %len, 8, %g0 /* IEU1 Group */ be,pn %icc, 1f /* CTI */ lduwa [%src] %asi, %o4 /* Load */ lduwa [%src+4] %asi, %g2 /* Load Group */ add %src, 8, %src /* IEU0 */ add %dst, 8, %dst /* IEU1 */ sllx %o4, 32, %g5 /* IEU0 Group */ stw %o4, [%dst - 8] /* Store */ or %g5, %g2, %g5 /* IEU0 Group */ stw %g2, [%dst - 4] /* Store */ addcc %g5, %sum, %sum /* IEU1 Group */ bcs,a,pn %xcc, 1f /* CTI */ add %sum, 1, %sum /* IEU0 */1: andcc %len, 4, %g0 /* IEU1 Group */ be,a,pn %icc, 1f /* CTI */ clr %g2 /* IEU0 */ lduwa [%src] %asi, %g7 /* Load */ add %src, 4, %src /* IEU0 Group */ add %dst, 4, %dst /* IEU1 */ sllx %g7, 32, %g2 /* IEU0 Group */ stw %g7, [%dst - 4] /* Store */1: andcc %len, 2, %g0 /* IEU1 */ be,a,pn %icc, 1f /* CTI */ clr %g3 /* IEU0 Group */ lduha [%src] %asi, %g7 /* Load */ add %src, 2, %src /* IEU1 */ add %dst, 2, %dst /* IEU0 Group */ sll %g7, 16, %g3 /* IEU0 Group */ sth %g7, [%dst - 2] /* Store */1: andcc %len, 1, %g0 /* IEU1 */ be,a,pn %icc, 1f /* CTI */ clr %o5 /* IEU0 Group */ lduba [%src] %asi, %g7 /* Load */ sll %g7, 8, %o5 /* IEU0 Group */ stb %g7, [%dst] /* Store */1: or %g2, %g3, %g3 /* IEU1 */ or %o5, %g3, %g3 /* IEU0 Group (regdep) */ addcc %g3, %sum, %sum /* IEU1 Group (regdep) */ bcs,a,pn %xcc, 1f /* CTI */ add %sum, 1, %sum /* IEU0 */1: ba,pt %xcc, 25b /* CTI Group */ sllx %sum, 32, %g1 /* IEU0 */#ifdef __KERNEL__end: .section __ex_table .align 4 .word csum_partial_copy_vis, 0, end, cpc_handler#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -