📄 trsm_kernel_ln.s
字号:
} ;; { .mfi (p3) LDFPD f58, f59 = [BOFFSET], 2 * SIZE FMA f65 = f33, f48, f65 // A2 * B1 (p5) adds C13 = 2 * SIZE, C5 } { .mfi nop __LINE__ FMA f73 = f33, f49, f73 // A2 * B2 (p5) adds C14 = 2 * SIZE, C6 } ;; { .mfi (p3) LDFPD f60, f61 = [BOFFSET], 2 * SIZE FMA f81 = f33, f50, f81 // A2 * B3 (p5) adds C15 = 2 * SIZE, C7 } { .mfi nop __LINE__ FMA f89 = f33, f51, f89 // A2 * B4 (p5) adds C16 = 2 * SIZE, C8 } ;; { .mfb (p3) LDFPD f62, f63 = [BOFFSET], 2 * SIZE FMA f97 = f33, f52, f97 // A2 * B5 nop __LINE__ } { .mfb nop __LINE__ FMA f105 = f33, f53, f105 // A2 * B6 nop __LINE__ } ;; { .mfb (p3) LDFPD f42, f43 = [AOFFSET], 2 * SIZE FMA f113 = f33, f54, f113 // A2 * B7 nop __LINE__ } { .mfb nop __LINE__ FMA f121 = f33, f55, f121 // A2 * B8 nop __LINE__ } ;; { .mfb nop __LINE__ FMA f66 = f34, f48, f66 // A3 * B1 nop __LINE__ } { .mfb nop __LINE__ FMA f74 = f34, f49, f74 // A3 * B2 nop __LINE__ } ;; { .mfb nop __LINE__ FMA f82 = f34, f50, f82 // A3 * B3 nop __LINE__ } { .mfb nop __LINE__ FMA f90 = f34, f51, f90 // A3 * B4 nop __LINE__ } ;; { .mfb nop __LINE__ FMA f98 = f34, f52, f98 // A3 * B5 nop __LINE__ } { .mfb nop __LINE__ FMA f106 = f34, f53, f106 // A3 * B6 nop __LINE__ } { .mfb nop __LINE__ FMA f114 = f34, f54, f114 // A3 * B7 nop __LINE__ } { .mfb nop __LINE__ FMA f122 = f34, f55, f122 // A3 * B8 nop __LINE__ } { .mfb nop __LINE__ FMA f67 = f35, f48, f67 // A4 * B1 nop __LINE__ } { .mfb nop __LINE__ FMA f75 = f35, f49, f75 // A4 * B2 nop __LINE__ } { .mfb nop __LINE__ FMA f83 = f35, f50, f83 // A4 * B3 nop __LINE__ } { .mfb nop __LINE__ FMA f91 = f35, f51, f91 // A4 * B4 nop __LINE__ } { .mfb (p4) LDFPD f32, f33 = [AOFFSET], 2 * SIZE FMA f99 = f35, f52, f99 // A4 * B5 nop __LINE__ } { .mfb nop __LINE__ FMA f107 = f35, f53, f107 // A4 * B6 nop __LINE__ } { .mfb (p4) LDFPD f48, f49 = [BOFFSET], 2 * SIZE FMA f115 = f35, f54, f115 // A4 * B7 nop __LINE__ } { .mfb nop __LINE__ FMA f123 = f35, f55, f123 // A4 * B8 nop __LINE__ } ;; { .mfb (p4) LDFPD f50, f51 = [BOFFSET], 2 * SIZE (p3) FMA f64 = f40, f56, f64 // A1 * B1 nop __LINE__ } { .mfb nop __LINE__ (p3) FMA f72 = f40, f57, f72 // A1 * B2 nop __LINE__ } ;; { .mfb (p4) LDFPD f52, f53 = [BOFFSET], 2 * SIZE (p3) FMA f80 = f40, f58, f80 // A1 * B3 nop __LINE__ } { .mfb nop __LINE__ (p3) FMA f88 = f40, f59, f88 // A1 * B4 nop __LINE__ } ;; { .mfb nop __LINE__ (p3) FMA f96 = f40, f60, f96 // A1 * B5 nop __LINE__ } { .mfb nop __LINE__ (p3) FMA f104 = f40, f61, f104 // A1 * B6 nop __LINE__ } ;; { .mfb nop __LINE__ (p3) FMA f112 = f40, f62, f112 // A1 * B7 nop __LINE__ } { .mfb nop __LINE__ (p3) FMA f120 = f40, f63, f120 // A1 * B8 nop __LINE__ } ;; { .mfb (p4) LDFPD f54, f55 = [BOFFSET], 2 * SIZE (p3) FMA f65 = f41, f56, f65 // A2 * B1 nop __LINE__ } { .mfb (p3) FMA f73 = f41, f57, f73 // A2 * B2 nop __LINE__ } { .mfb (p4) LDFPD f34, f35 = [AOFFSET], 2 * SIZE (p3) FMA f81 = f41, f58, f81 // A2 * B3 nop __LINE__ } { .mfb (p3) FMA f89 = f41, f59, f89 // A2 * B4 nop __LINE__ } ;; { .mfb nop __LINE__ (p3) FMA f97 = f41, f60, f97 // A2 * B5 nop __LINE__ } { .mfb nop __LINE__ (p3) FMA f105 = f41, f61, f105 // A2 * B6 nop __LINE__ } ;; { .mfb nop __LINE__ (p3) FMA f113 = f41, f62, f113 // A2 * B7 nop __LINE__ } { .mfb nop __LINE__ (p3) FMA f121 = f41, f63, f121 // A2 * B8 nop __LINE__ } ;; { .mfb nop __LINE__ (p3) FMA f66 = f42, f56, f66 // A3 * B1 nop __LINE__ } { .mfb nop __LINE__ (p3) FMA f74 = f42, f57, f74 // A3 * B2 nop __LINE__ } ;; { .mfb nop __LINE__ (p3) FMA f82 = f42, f58, f82 // A3 * B3 nop __LINE__ } { .mfb nop __LINE__ (p3) FMA f90 = f42, f59, f90 // A3 * B4 nop __LINE__ } ;; { .mfb nop __LINE__ (p3) FMA f98 = f42, f60, f98 // A3 * B5 nop __LINE__ } { .mfb nop __LINE__ (p3) FMA f106 = f42, f61, f106 // A3 * B6 nop __LINE__ } ;; { .mfb nop __LINE__ (p3) FMA f114 = f42, f62, f114 // A3 * B7 nop __LINE__ } { .mfb nop __LINE__ (p3) FMA f122 = f42, f63, f122 // A3 * B8 nop __LINE__ } ;; { .mfb nop __LINE__ (p3) FMA f67 = f43, f56, f67 // A4 * B1 nop __LINE__ } { .mfb nop __LINE__ (p3) FMA f75 = f43, f57, f75 // A4 * B2 nop __LINE__ } ;; { .mfb nop __LINE__ (p3) FMA f83 = f43, f58, f83 // A4 * B3 nop __LINE__ } { .mfb nop __LINE__ (p3) FMA f91 = f43, f59, f91 // A4 * B4 nop __LINE__ } ;; { .mfb nop __LINE__ (p3) FMA f99 = f43, f60, f99 // A4 * B5 nop __LINE__ } { .mfb nop __LINE__ (p3) FMA f107 = f43, f61, f107 // A4 * B6 nop __LINE__ } ;; { .mfi nop __LINE__ (p3) FMA f115 = f43, f62, f115 // A4 * B7 adds L = -1, L } { .mfb nop __LINE__ (p3) FMA f123 = f43, f63, f123 // A4 * B8 br.cloop.sptk.few .L022 } ;; .L028: #if defined(LN) || defined(RT) #ifdef LN adds r2 = -4, KK #else adds r2 = -8, KK #endif ;; shladd r2 = r2, BASE_SHIFT, r0 ;; shladd AOFFSET = r2, 2, AORIG shladd BOFFSET = r2, 3, B ;; #endif adds AOFFSET2 = 4 * SIZE, AOFFSET adds BOFFSET2 = 4 * SIZE, BOFFSET ;; #if defined(LN) || defined(LT) LDFPD f32, f33 = [BOFFSET], 2 * SIZE ;; LDFPD f34, f35 = [BOFFSET], 2 * SIZE ;; LDFPD f36, f37 = [BOFFSET], 2 * SIZE ;; LDFPD f38, f39 = [BOFFSET], 2 * SIZE ;; LDFPD f40, f41 = [BOFFSET], 2 * SIZE ;; LDFPD f42, f43 = [BOFFSET], 2 * SIZE ;; LDFPD f44, f45 = [BOFFSET], 2 * SIZE ;; LDFPD f46, f47 = [BOFFSET], 2 * SIZE ;; { .mfi LDFPD f48, f49 = [BOFFSET], 2 * SIZE FSUB f64 = f32, f64 nop __LINE__ } { .mfi nop __LINE__ FSUB f72 = f33, f72 nop __LINE__ } ;; { .mfi LDFPD f50, f51 = [BOFFSET], 2 * SIZE FSUB f80 = f34, f80 nop __LINE__ } { .mfi nop __LINE__ FSUB f88 = f35, f88 nop __LINE__ } ;; { .mfi LDFPD f52, f53 = [BOFFSET], 2 * SIZE FSUB f96 = f36, f96 nop __LINE__ } { .mfi nop __LINE__ FSUB f104 = f37, f104 nop __LINE__ } ;; { .mfi LDFPD f54, f55 = [BOFFSET], 2 * SIZE FSUB f112 = f38, f112 nop __LINE__ } { .mfi nop __LINE__ FSUB f120 = f39, f120 nop __LINE__ } ;; { .mfi LDFPD f56, f57 = [BOFFSET], 2 * SIZE FSUB f65 = f40, f65 nop __LINE__ } { .mfi nop __LINE__ FSUB f73 = f41, f73 nop __LINE__ } ;; { .mfi LDFPD f58, f59 = [BOFFSET], 2 * SIZE FSUB f81 = f42, f81 nop __LINE__ } { .mfi nop __LINE__ FSUB f89 = f43, f89 nop __LINE__ } ;; { .mfi LDFPD f60, f61 = [BOFFSET], 2 * SIZE FSUB f97 = f44, f97 nop __LINE__ } { .mfi nop __LINE__ FSUB f105 = f45, f105 nop __LINE__ } ;; { .mfi LDFPD f62, f63 = [BOFFSET] FSUB f113 = f46, f113 adds BOFFSET = -30 * SIZE, BOFFSET } { .mfi nop __LINE__ FSUB f121 = f47, f121 nop __LINE__ } ;; FSUB f66 = f48, f66 FSUB f74 = f49, f74 FSUB f82 = f50, f82 FSUB f90 = f51, f90 FSUB f98 = f52, f98 FSUB f106 = f53, f106 FSUB f114 = f54, f114 FSUB f122 = f55, f122 ;; FSUB f67 = f56, f67 FSUB f75 = f57, f75 FSUB f83 = f58, f83 FSUB f91 = f59, f91 FSUB f99 = f60, f99 FSUB f107 = f61, f107 FSUB f115 = f62, f115 FSUB f123 = f63, f123 ;; #else LDFPD f32, f33 = [AOFFSET], 2 * SIZE ;; LDFPD f34, f35 = [AOFFSET], 2 * SIZE ;; LDFPD f36, f37 = [AOFFSET], 2 * SIZE ;; LDFPD f38, f39 = [AOFFSET], 2 * SIZE ;; LDFPD f40, f41 = [AOFFSET], 2 * SIZE ;; LDFPD f42, f43 = [AOFFSET], 2 * SIZE ;; LDFPD f44, f45 = [AOFFSET], 2 * SIZE ;; LDFPD f46, f47 = [AOFFSET], 2 * SIZE ;; LDFPD f48, f49 = [AOFFSET], 2 * SIZE ;; LDFPD f50, f51 = [AOFFSET], 2 * SIZE ;; LDFPD f52, f53 = [AOFFSET], 2 * SIZE ;; LDFPD f54, f55 = [AOFFSET], 2 * SIZE ;; LDFPD f56, f57 = [AOFFSET], 2 * SIZE ;; LDFPD f58, f59 = [AOFFSET], 2 * SIZE ;; LDFPD f60, f61 = [AOFFSET], 2 * SIZE ;; LDFPD f62, f63 = [AOFFSET] adds AOFFSET = -30 * SIZE, AOFFSET ;; FSUB f64 = f32, f64 FSUB f65 = f33, f65 FSUB f66 = f34, f66 FSUB f67 = f35, f67 FSUB f72 = f36, f72 FSUB f73 = f37, f73 FSUB f74 = f38, f74 FSUB f75 = f39, f75 FSUB f80 = f40, f80 FSUB f81 = f41, f81 FSUB f82 = f42, f82 FSUB f83 = f43, f83 FSUB f88 = f44, f88 FSUB f89 = f45, f89 FSUB f90 = f46, f90 FSUB f91 = f47, f91 ;; FSUB f96 = f48, f96 FSUB f97 = f49, f97 FSUB f98 = f50, f98 FSUB f99 = f51, f99 ;; FSUB f104 = f52, f104 FSUB f105 = f53, f105 FSUB f106 = f54, f106 FSUB f107 = f55, f107 ;; FSUB f112 = f56, f112 FSUB f113 = f57, f113 FSUB f114 = f58, f114 FSUB f115 = f59, f115 ;; FSUB f120 = f60, f120 FSUB f121 = f61, f121 FSUB f122 = f62, f122 FSUB f123 = f63, f123 ;; #endif #ifdef LN adds AOFFSET = 14 * SIZE, AOFFSET ;; LDFPD f33, f32 = [AOFFSET] adds AOFFSET = - 2 * SIZE, AOFFSET ;; LDFPD f35, f34 = [AOFFSET] adds AOFFSET = - 2 * SIZE, AOFFSET ;; LDFD f36 = [AOFFSET], - 2 * SIZE ;; LDFPD f38, f37 = [AOFFSET] adds AOFFSET = - 4 * SIZE, AOFFSET ;; LDFPD f40, f39 = [AOFFSET] adds AOFFSET = - 4 * SIZE, AOFFSET ;; LDFD f41 = [AOFFSET] ;; FMPY f67 = f67, f32 FMPY f99 = f99, f32 FMPY f75 = f75, f32 FMPY f107 = f107, f32 FMPY f83 = f83, f32 FMPY f115 = f115, f32 FMPY f91 = f91, f32 FMPY f123 = f123, f32 ;; FNMA f66 = f67, f33, f66 FNMA f98 = f99, f33, f98 FNMA f74 = f75, f33, f74 FNMA f106 = f107, f33, f106 FNMA f82 = f83, f33, f82 FNMA f114 = f115, f33, f114 FNMA f90 = f91, f33, f90 FNMA f122 = f123, f33, f122 ;; FNMA f65 = f67, f34, f65 FNMA f97 = f99, f34, f97 FNMA f73 = f75, f34, f73 FNMA f105 = f107, f34, f105 FNMA f81 = f83, f34, f81 FNMA f113 = f115, f34, f113 FNMA f89 = f91, f34, f89 FNMA f121 = f123, f34, f121 ;; FNMA f64 = f67, f35, f64 FNMA f96 = f99, f35, f96 FNMA f72 = f75, f35, f72 FNMA f104 = f107, f35, f104 FNMA f80 = f83, f35, f80 FNMA f112 = f115, f35, f112 FNMA f88 = f91, f35, f88 FNMA f120 = f123, f35, f120 ;; FMPY f66 = f66, f36 FMPY f98 = f98, f36 FMPY f74 = f74, f36 FMPY f106 = f106, f36 FMPY f82 = f82, f36 FMPY f114 = f114, f36 FMPY f90 = f90, f36 FMPY f122 = f122, f36 ;; FNMA f65 = f66, f37, f65 FNMA f97 = f98, f37, f97 FNMA f73 = f74, f37, f73 FNMA f105 = f106, f37, f105 FNMA f81 = f82, f37, f81 FNMA f113 = f114, f37, f113 FNMA f89 = f90, f37, f89 FNMA f121 = f122, f37, f121 ;; FNMA f64 = f66, f38, f64 FNMA f96 = f98, f38, f96 FNMA f72 = f74, f38, f72 FNMA f104 = f106, f38, f104 FNMA f80 = f82, f38, f80 FNMA f112 = f114, f38, f112 FNMA f88 = f90, f38, f88 FNMA f120 = f122, f38, f120 ;; adds BOFFSET = 24 * SIZE, BOFFSET adds BOFFSET2 = 24 * SIZE, BOFFSET2 ;; { .mfi STFD [BOFFSET] = f67, SIZE FMPY f65 = f65, f39 } { .mfi STFD [BOFFSET2] = f99, SIZE FMPY f97 = f97, f39 } ;; { .mfi STFD [BOFFSET] = f75, SIZE FMPY f73 = f73, f39 } { .mfi STFD [BOFFSET2] = f107, SIZE FMPY f105 = f105, f39 } ;; { .mfi STFD [BOFFSET] = f83, SIZE FMPY f81 = f81, f39 } { .mfi STFD [BOFFSET2] = f115, SIZE FMPY f113 = f113, f39 } ;; { .mfi STFD [BOFFSET] = f91, - 11 * SIZE FMPY f89 = f89, f39 } { .mfi STFD [BOFFSET2] = f123, - 11 * SIZE FMPY f121 = f121, f39 } ;; { .mfi STFD [BOFFSET] = f66, SIZE FNMA f64 = f65, f40, f64 } { .mfi STFD [BOFFSET2] = f98, SIZE FNMA f96 = f97, f40, f96 } ;; { .mfi STFD [BOFFSET] = f74, SIZE FNMA f72 = f73, f40, f72 } { .mfi STFD [BOFFSET2] = f106, SIZE FNMA f104 = f105, f40, f104 } ;; { .mfi STFD [BOFFSET] = f82, SIZE FNMA f80 = f81, f40, f80 } { .mfi STFD [BOFFSET2] = f114, SIZE FNMA f112 = f113, f40, f112 } ;; { .mfi STFD [BOFFSET] = f90, -11 * SIZE FNMA f88 = f89, f40, f88 } { .mfi STFD [BOFFSET2] = f122, -11 * SIZE FNMA f120 = f121, f40, f120 } ;; { .mfi STFD [BOFFSET] = f65, SIZE FMPY f64 = f64, f41 } { .mfi STFD [BOFFSET2] = f97, SIZE FMPY f96 = f96, f41 } ;; { .mfi STFD [BOFFSET] = f73, SIZE FMPY f72 = f72, f41 } { .mfi STFD [BOFFSET2] = f105, SIZE FMPY f104 = f104, f41 } ;; { .mfi STFD [BOFFSET] = f81, SIZE FMPY f80 = f80, f41 } { .mfi STFD [BOFFSET2] = f113, SIZE FMPY f112 = f112, f41 } ;; { .mfi STFD [BOFFSET] = f89, - 11 * SIZE FMPY f88 = f88, f41 } { .mfi STFD [BOFFSET2] = f121, - 11 * SIZE FMPY f120 = f120, f41 } ;; { .mmi STFD [BOFFSET] = f64, SIZE STFD [BOFFSET2] = f96, SIZE adds C1 = -4 * SIZE, C1 } ;; { .mmi STFD [BOFFSET] = f72, SIZE STFD [BOFFSET2] = f104, SIZE adds C2 = -4 * SIZE, C2 } ;; { .mmi STFD [BOFFSET] = f80, SIZE STFD [BOFFSET2] = f112, SIZE nop __LINE__ } ;; { .mmi STFD [BOFFSET] = f88, - 3 * SIZE STFD [BOFFSET2] = f120, - 3 * SIZE } ;; #endif #ifdef LT LDFPD f32, f33 = [AOFFSET], 2 * SIZE ;; LDFPD f34, f35 = [AOFFSET] adds AOFFSET = 3 * SIZE, AOFFSET ;; LDFD f36 = [AOFFSET], 1 * SIZE ;;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -