📄 viscsumcopy.s
字号:
brz,pn %o4, 3f /* CTI+IEU1 Group */ sub %g1, %o4, %g1 /* IEU0 */ ldda [%src] %asi, %f0 /* Load */ clr %o4 /* IEU0 Group */ andcc %dst, 8, %g0 /* IEU1 */ be,pn %icc, 1f /* CTI */ ldda [%src + 8] %asi, %f2 /* Load Group */ add %src, 8, %src /* IEU0 */ sub %len, 8, %len /* IEU1 */ fpadd32 %f0, %f48, %f50 /* FPA */ addcc %dst, 8, %dst /* IEU1 Group */ faligndata %f0, %f2, %f16 /* FPA */ fcmpgt32 %f48, %f50, %o4 /* FPM Group */ fmovd %f2, %f0 /* FPA Group */ ldda [%src + 8] %asi, %f2 /* Load */ std %f16, [%dst - 8] /* Store */ fmovd %f50, %f48 /* FPA */1: andcc %g1, 0x10, %g0 /* IEU1 Group */ be,pn %icc, 1f /* CTI */ and %g1, 0x20, %g1 /* IEU0 */ fpadd32 %f0, %f48, %f50 /* FPA */ ldda [%src + 16] %asi, %f4 /* Load Group */ add %src, 16, %src /* IEU0 */ add %dst, 16, %dst /* IEU1 */ faligndata %f0, %f2, %f16 /* FPA */ fcmpgt32 %f48, %f50, %g5 /* FPM Group */ sub %len, 16, %len /* IEU0 */ inc %o4 /* IEU1 */ std %f16, [%dst - 16] /* Store Group */ fpadd32 %f2, %f50, %f48 /* FPA */ srl %o4, 1, %o5 /* IEU0 */ faligndata %f2, %f4, %f18 /* FPA Group */ std %f18, [%dst - 8] /* Store */ fcmpgt32 %f50, %f48, %o4 /* FPM Group */ add %o5, %sum, %sum /* IEU0 */ ldda [%src + 8] %asi, %f2 /* Load */ fmovd %f4, %f0 /* FPA */1: brz,a,pn %g1, 4f /* CTI+IEU1 Group */ rd %asi, %g2 /* LSU Group + 4 bubbles*/ inc %g5 /* IEU0 */ fpadd32 %f0, %f48, %f50 /* FPA */ ldda [%src + 16] %asi, %f4 /* Load Group */ srl %g5, 1, %g5 /* IEU0 */ add %dst, 32, %dst /* IEU1 */ faligndata %f0, %f2, %f16 /* FPA */ fcmpgt32 %f48, %f50, %o5 /* FPM Group */ inc %o4 /* IEU0 */ ldda [%src + 24] %asi, %f6 /* Load */ srl %o4, 1, %o4 /* IEU0 Group */ add %g5, %sum, %sum /* IEU1 */ ldda [%src + 32] %asi, %f8 /* Load */ fpadd32 %f2, %f50, %f48 /* FPA */ faligndata %f2, %f4, %f18 /* FPA Group */ sub %len, 32, %len /* IEU0 */ std %f16, [%dst - 32] /* Store */ fcmpgt32 %f50, %f48, %g3 /* FPM Group */ inc %o5 /* IEU0 */ add %o4, %sum, %sum /* IEU1 */ fpadd32 %f4, %f48, %f50 /* FPA */ faligndata %f4, %f6, %f20 /* FPA Group */ srl %o5, 1, %o5 /* IEU0 */ fcmpgt32 %f48, %f50, %g5 /* FPM Group */ add %o5, %sum, %sum /* IEU0 */ std %f18, [%dst - 24] /* Store */ fpadd32 %f6, %f50, %f48 /* FPA */ inc %g3 /* IEU0 Group */ std %f20, [%dst - 16] /* Store */ add %src, 32, %src /* IEU1 */ faligndata %f6, %f8, %f22 /* FPA */ fcmpgt32 %f50, %f48, %o4 /* FPM Group */ srl %g3, 1, %g3 /* IEU0 */ std %f22, [%dst - 8] /* Store */ add %g3, %sum, %sum /* IEU0 Group */3: rd %asi, %g2 /* LSU Group + 4 bubbles*/#ifdef __KERNEL__4: sethi %hi(vis0s), %g7 /* IEU0 Group */ or %g2, ASI_BLK_OR, %g2 /* IEU1 */#else4: rd %pc, %g7 /* LSU Group + 4 bubbles*/#endif inc %g5 /* IEU0 Group */ and %src, 0x38, %g3 /* IEU1 */ membar #StoreLoad /* LSU Group */ srl %g5, 1, %g5 /* IEU0 */ inc %o4 /* IEU1 */ sll %g3, 8, %g3 /* IEU0 Group */ sub %len, 0xc0, %len /* IEU1 */ addcc %g5, %sum, %sum /* IEU1 Group */ srl %o4, 1, %o4 /* IEU0 */ add %g7, %g3, %g7 /* IEU0 Group */ add %o4, %sum, %sum /* IEU1 */#ifdef __KERNEL__ jmpl %g7 + %lo(vis0s), %g0 /* CTI+IEU1 Group */#else jmpl %g7 + (vis0s - 4b), %g0 /* CTI+IEU1 Group */#endif fzero %f32 /* FPA */ .align 2048vis0s: wr %g2, ASI_BLK_XOR, %asi /* LSU Group */ add %src, 128, %src /* IEU0 Group */ ldda [%src-128] %asi, %f0 /* Load Group */ ldda [%src-64] %asi, %f16 /* Load Group */ fmovd %f48, %f62 /* FPA Group f0 available*/ faligndata %f0, %f2, %f48 /* FPA Group f2 available*/ fcmpgt32 %f32, %f2, %x1 /* FPM Group f4 available*/ fpadd32 %f0, %f62, %f0 /* FPA */ fcmpgt32 %f32, %f4, %x2 /* FPM Group f6 available*/ faligndata %f2, %f4, %f50 /* FPA */ fcmpgt32 %f62, %f0, %x3 /* FPM Group f8 available*/ faligndata %f4, %f6, %f52 /* FPA */ fcmpgt32 %f32, %f6, %x4 /* FPM Group f10 available*/ inc %x1 /* IEU0 */ faligndata %f6, %f8, %f54 /* FPA */ fcmpgt32 %f32, %f8, %x5 /* FPM Group f12 available*/ srl %x1, 1, %x1 /* IEU0 */ inc %x2 /* IEU1 */ faligndata %f8, %f10, %f56 /* FPA */ fcmpgt32 %f32, %f10, %x6 /* FPM Group f14 available*/ srl %x2, 1, %x2 /* IEU0 */ add %sum, %x1, %sum /* IEU1 */ faligndata %f10, %f12, %f58 /* FPA */ fcmpgt32 %f32, %f12, %x7 /* FPM Group */ inc %x3 /* IEU0 */ add %sum, %x2, %sum /* IEU1 */ faligndata %f12, %f14, %f60 /* FPA */ fcmpgt32 %f32, %f14, %x8 /* FPM Group */ srl %x3, 1, %x3 /* IEU0 */ inc %x4 /* IEU1 */ fmovd %f14, %f62 /* FPA */ srl %x4, 1, %x4 /* IEU0 Group */ add %sum, %x3, %sum /* IEU1 */vis0: DO_THE_TRICK( f0,f2,f4,f6,f8,f10,f12,f14,f16,f18,f20,f22,f24,f26,f28,f30, ,f48,f50,f52,f54,f56,f58,f60,f62,f62, ,LDBLK(f32), STBLK,,,,,,,, ,bcs,pn %icc, vis0e1) DO_THE_TRICK( f16,f18,f20,f22,f24,f26,f28,f30,f32,f34,f36,f38,f40,f42,f44,f46, ,f48,f50,f52,f54,f56,f58,f60,f62,f62, ,LDBLK(f0), STBLK,,,,,,,, ,bcs,pn %icc, vis0e2) DO_THE_TRICK( f32,f34,f36,f38,f40,f42,f44,f46,f0,f2,f4,f6,f8,f10,f12,f14, ,f48,f50,f52,f54,f56,f58,f60,f62,f62, ,LDBLK(f16), STBLK,,,,,,,, ,bcc,pt %icc, vis0)vis0e3: DO_THE_TRICK( f0,f2,f4,f6,f8,f10,f12,f14,f16,f18,f20,f22,f24,f26,f28,f30, ,f48,f50,f52,f54,f56,f58,f60,f62,f32, ,SYNC, STBLK,ST(f48,64),ST(f50,8),ST(f52,16),ST(f54,24),ST(f56,32),ST(f58,40),ST(f60,48), ,add %dst, 56, %dst; add %len, 192 - 8*8, %len; ba,pt %icc, e2)vis0e1: DO_THE_TRICK( f16,f18,f20,f22,f24,f26,f28,f30,f32,f34,f36,f38,f40,f42,f44,f46, ,f48,f50,f52,f54,f56,f58,f60,f62,f0, ,SYNC, STBLK,ST(f48,64),ST(f50,8),ST(f52,16),ST(f54,24),ST(f56,32),ST(f58,40),ST(f60,48), ,add %dst, 56, %dst; add %len, 192 - 8*8, %len; ba,pt %icc, e3)vis0e2: DO_THE_TRICK( f32,f34,f36,f38,f40,f42,f44,f46,f0,f2,f4,f6,f8,f10,f12,f14, ,f48,f50,f52,f54,f56,f58,f60,f62,f16, ,SYNC, STBLK,ST(f48,64),ST(f50,8),ST(f52,16),ST(f54,24),ST(f56,32),ST(f58,40),ST(f60,48), ,add %dst, 56, %dst; add %len, 192 - 8*8, %len; ba,pt %icc, e1) .align 2048vis1s: wr %g2, ASI_BLK_XOR, %asi /* LSU Group */ add %src, 128 - 8, %src /* IEU0 Group */ ldda [%src-128] %asi, %f0 /* Load Group */ ldda [%src-64] %asi, %f16 /* Load Group */ fmovd %f0, %f58 /* FPA Group */ fmovd %f48, %f0 /* FPA Group */ fcmpgt32 %f32, %f2, %x2 /* FPM Group */ faligndata %f2, %f4, %f48 /* FPA */ fcmpgt32 %f32, %f4, %x3 /* FPM Group */ faligndata %f4, %f6, %f50 /* FPA */ fcmpgt32 %f32, %f6, %x4 /* FPM Group */ faligndata %f6, %f8, %f52 /* FPA */ fcmpgt32 %f32, %f8, %x5 /* FPM Group */ inc %x2 /* IEU1 */ faligndata %f8, %f10, %f54 /* FPA */ fcmpgt32 %f32, %f10, %x6 /* FPM Group */ srl %x2, 1, %x2 /* IEU0 */ faligndata %f10, %f12, %f56 /* FPA */ fcmpgt32 %f32, %f12, %x7 /* FPM Group */ inc %x3 /* IEU0 */ add %sum, %x2, %sum /* IEU1 */ faligndata %f12, %f14, %f58 /* FPA */ fcmpgt32 %f32, %f14, %x8 /* FPM Group */ srl %x3, 1, %x3 /* IEU0 */ inc %x4 /* IEU1 */ fmovd %f14, %f60 /* FPA */ srl %x4, 1, %x4 /* IEU0 Group */ add %sum, %x3, %sum /* IEU1 */vis1: DO_THE_TRICK( f0,f2,f4,f6,f8,f10,f12,f14,f16,f18,f20,f22,f24,f26,f28,f30, ,f62,f48,f50,f52,f54,f56,f58,f60,f60, ,LDBLK(f32), ,STBLK,,,,,,, ,bcs,pn %icc, vis1e1) DO_THE_TRICK( f16,f18,f20,f22,f24,f26,f28,f30,f32,f34,f36,f38,f40,f42,f44,f46, ,f62,f48,f50,f52,f54,f56,f58,f60,f60, ,LDBLK(f0), ,STBLK,,,,,,, ,bcs,pn %icc, vis1e2) DO_THE_TRICK( f32,f34,f36,f38,f40,f42,f44,f46,f0,f2,f4,f6,f8,f10,f12,f14, ,f62,f48,f50,f52,f54,f56,f58,f60,f60, ,LDBLK(f16), ,STBLK,,,,,,, ,bcc,pt %icc, vis1)vis1e3: DO_THE_TRICK( f0,f2,f4,f6,f8,f10,f12,f14,f16,f18,f20,f22,f24,f26,f28,f30, ,f62,f48,f50,f52,f54,f56,f58,f60,f32, ,SYNC, ,STBLK,ST(f48,0),ST(f50,8),ST(f52,16),ST(f54,24),ST(f56,32),ST(f58,40), ,add %dst, 48, %dst; add %len, 192 - 7*8, %len; ba,pt %icc, e2)vis1e1: DO_THE_TRICK( f16,f18,f20,f22,f24,f26,f28,f30,f32,f34,f36,f38,f40,f42,f44,f46, ,f62,f48,f50,f52,f54,f56,f58,f60,f0, ,SYNC, ,STBLK,ST(f48,0),ST(f50,8),ST(f52,16),ST(f54,24),ST(f56,32),ST(f58,40), ,add %dst, 48, %dst; add %len, 192 - 7*8, %len; ba,pt %icc, e3)vis1e2: DO_THE_TRICK( f32,f34,f36,f38,f40,f42,f44,f46,f0,f2,f4,f6,f8,f10,f12,f14, ,f62,f48,f50,f52,f54,f56,f58,f60,f16, ,SYNC, ,STBLK,ST(f48,0),ST(f50,8),ST(f52,16),ST(f54,24),ST(f56,32),ST(f58,40), ,add %dst, 48, %dst; add %len, 192 - 7*8, %len; ba,pt %icc, e1) .align 2048vis2s: wr %g2, ASI_BLK_XOR, %asi /* LSU Group */ add %src, 128 - 16, %src /* IEU0 Group */ ldda [%src-128] %asi, %f0 /* Load Group */ ldda [%src-64] %asi, %f16 /* Load Group */ fmovd %f0, %f56 /* FPA Group */ fmovd %f48, %f0 /* FPA Group */ sub %dst, 64, %dst /* IEU0 */ fpsub32 %f2, %f2, %f2 /* FPA Group */ fcmpgt32 %f32, %f4, %x3 /* FPM Group */ faligndata %f4, %f6, %f48 /* FPA */ fcmpgt32 %f32, %f6, %x4 /* FPM Group */ faligndata %f6, %f8, %f50 /* FPA */ fcmpgt32 %f32, %f8, %x5 /* FPM Group */ faligndata %f8, %f10, %f52 /* FPA */ fcmpgt32 %f32, %f10, %x6 /* FPM Group */ faligndata %f10, %f12, %f54 /* FPA */ fcmpgt32 %f32, %f12, %x7 /* FPM Group */ inc %x3 /* IEU0 */ faligndata %f12, %f14, %f56 /* FPA */ fcmpgt32 %f32, %f14, %x8 /* FPM Group */ srl %x3, 1, %x3 /* IEU0 */ inc %x4 /* IEU1 */ fmovd %f14, %f58 /* FPA */ srl %x4, 1, %x4 /* IEU0 Group */ add %sum, %x3, %sum /* IEU1 */vis2: DO_THE_TRICK( f0,f2,f4,f6,f8,f10,f12,f14,f16,f18,f20,f22,f24,f26,f28,f30, ,f60,f62,f48,f50,f52,f54,f56,f58,f58, ,LDBLK(f32), ,,STBLK,,,,,, ,bcs,pn %icc, vis2e1) DO_THE_TRICK( f16,f18,f20,f22,f24,f26,f28,f30,f32,f34,f36,f38,f40,f42,f44,f46, ,f60,f62,f48,f50,f52,f54,f56,f58,f58, ,LDBLK(f0), ,,STBLK,,,,,, ,bcs,pn %icc, vis2e2) DO_THE_TRICK( f32,f34,f36,f38,f40,f42,f44,f46,f0,f2,f4,f6,f8,f10,f12,f14, ,f60,f62,f48,f50,f52,f54,f56,f58,f58, ,LDBLK(f16), ,,STBLK,,,,,, ,bcc,pt %icc, vis2)vis2e3: DO_THE_TRICK( f0,f2,f4,f6,f8,f10,f12,f14,f16,f18,f20,f22,f24,f26,f28,f30, ,f60,f62,f48,f50,f52,f54,f56,f58,f32, ,SYNC, ,,STBLK,ST(f48,64),ST(f50,72),ST(f52,80),ST(f54,88),ST(f56,96), ,add %dst, 104, %dst; add %len, 192 - 6*8, %len; ba,pt %icc, e2)vis2e1: DO_THE_TRICK( f16,f18,f20,f22,f24,f26,f28,f30,f32,f34,f36,f38,f40,f42,f44,f46, ,f60,f62,f48,f50,f52,f54,f56,f58,f0, ,SYNC, ,,STBLK,ST(f48,64),ST(f50,72),ST(f52,80),ST(f54,88),ST(f56,96), ,add %dst, 104, %dst; add %len, 192 - 6*8, %len; ba,pt %icc, e3)vis2e2: DO_THE_TRICK( f32,f34,f36,f38,f40,f42,f44,f46,f0,f2,f4,f6,f8,f10,f12,f14, ,f60,f62,f48,f50,f52,f54,f56,f58,f16, ,SYNC, ,,STBLK,ST(f48,64),ST(f50,72),ST(f52,80),ST(f54,88),ST(f56,96), ,add %dst, 104, %dst; add %len, 192 - 6*8, %len; ba,pt %icc, e1) .align 2048vis3s: wr %g2, ASI_BLK_XOR, %asi /* LSU Group */ add %src, 128 - 24, %src /* IEU0 Group */ ldda [%src-128] %asi, %f0 /* Load Group */ ldda [%src-64] %asi, %f16 /* Load Group */ fmovd %f0, %f54 /* FPA Group */ fmovd %f48, %f0 /* FPA Group */ sub %dst, 64, %dst /* IEU0 */ fpsub32 %f2, %f2, %f2 /* FPA Group */ fpsub32 %f4, %f4, %f4 /* FPA Group */ fcmpgt32 %f32, %f6, %x4 /* FPM Group */ faligndata %f6, %f8, %f48 /* FPA */ fcmpgt32 %f32, %f8, %x5 /* FPM Group */ faligndata %f8, %f10, %f50 /* FPA */ fcmpgt32 %f32, %f10, %x6 /* FPM Group */ faligndata %f10, %f12, %f52 /* FPA */ fcmpgt32 %f32, %f12, %x7 /* FPM Group */ faligndata %f12, %f14, %f54 /* FPA */ fcmpgt32 %f32, %f14, %x8 /* FPM Group */ fmovd %f14, %f56 /* FPA */ inc %x4 /* IEU0 */ srl %x4, 1, %x4 /* IEU0 Group */vis3: DO_THE_TRICK( f0,f2,f4,f6,f8,f10,f12,f14,f16,f18,f20,f22,f24,f26,f28,f30, ,f58,f60,f62,f48,f50,f52,f54,f56,f56, ,LDBLK(f32), ,,,STBLK,,,,, ,bcs,pn %icc, vis3e1) DO_THE_TRICK( f16,f18,f20,f22,f24,f26,f28,f30,f32,f34,f36,f38,f40,f42,f44,f46, ,f58,f60,f62,f48,f50,f52,f54,f56,f56, ,LDBLK(f0), ,,,STBLK,,,,, ,bcs,pn %icc, vis3e2) DO_THE_TRICK( f32,f34,f36,f38,f40,f42,f44,f46,f0,f2,f4,f6,f8,f10,f12,f14, ,f58,f60,f62,f48,f50,f52,f54,f56,f56, ,LDBLK(f16), ,,,STBLK,,,,, ,bcc,pt %icc, vis3)vis3e3: DO_THE_TRICK( f0,f2,f4,f6,f8,f10,f12,f14,f16,f18,f20,f22,f24,f26,f28,f30, ,f58,f60,f62,f48,f50,f52,f54,f56,f32, ,SYNC, ,,,STBLK,ST(f48,64),ST(f50,72),ST(f52,80),ST(f54,88), ,add %dst, 96, %dst; add %len, 192 - 5*8, %len; ba,pt %icc, e2)vis3e1: DO_THE_TRICK( f16,f18,f20,f22,f24,f26,f28,f30,f32,f34,f36,f38,f40,f42,f44,f46, ,f58,f60,f62,f48,f50,f52,f54,f56,f0, ,SYNC, ,,,STBLK,ST(f48,64),ST(f50,72),ST(f52,80),ST(f54,88),
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -