📄 ztrsm_kernel_lt.s
字号:
{ .mfb nop __LINE__ (p3) FMA_B f115 = f42, f63, f115 // A3 * B8 nop __LINE__ } ;;/* 45 */ { .mfb nop __LINE__ (p3) FMA f67 = f43, f56, f67 // A4 * B1 nop __LINE__ } { .mfb nop __LINE__ (p3) FMA_A f66 = f43, f57, f66 // A4 * B2 nop __LINE__ } ;;/* 46 */ { .mfb nop __LINE__ (p3) FMA f83 = f43, f58, f83 // A4 * B3 nop __LINE__ } { .mfb nop __LINE__ (p3) FMA_A f82 = f43, f59, f82 // A4 * B4 nop __LINE__ } ;;/* 47 */ { .mfb nop __LINE__ (p3) FMA f99 = f43, f60, f99 // A4 * B5 nop __LINE__ } { .mfb nop __LINE__ (p3) FMA_A f98 = f43, f61, f98 // A4 * B6 nop __LINE__ } ;;/* 48 */ { .mfb nop __LINE__ (p3) FMA f115 = f43, f62, f115 // A4 * B7 nop __LINE__ } { .mfb nop __LINE__ (p3) FMA_A f114 = f43, f63, f114 // A4 * B8 nop __LINE__ } ;;/* 49 */ { .mfb nop __LINE__ (p3) FMA f68 = f44, f56, f68 // A5 * B1 nop __LINE__ } { .mfb nop __LINE__ (p3) FMA_B f69 = f44, f57, f69 // A5 * B2 nop __LINE__ } ;;/* 50 */ { .mfb nop __LINE__ (p3) FMA f84 = f44, f58, f84 // A5 * B3 nop __LINE__ } { .mfb nop __LINE__ (p3) FMA_B f85 = f44, f59, f85 // A5 * B4 nop __LINE__ } ;;/* 51 */ { .mfb nop __LINE__ (p3) FMA f100 = f44, f60, f100 // A5 * B5 nop __LINE__ } { .mfb nop __LINE__ (p3) FMA_B f101 = f44, f61, f101 // A5 * B6 nop __LINE__ } ;;/* 52 */ { .mfb nop __LINE__ (p3) FMA f116 = f44, f62, f116 // A5 * B7 nop __LINE__ } { .mfb nop __LINE__ (p3) FMA_B f117 = f44, f63, f117 // A5 * B8 nop __LINE__ } ;;/* 53 */ { .mfb nop __LINE__ (p3) FMA f69 = f45, f56, f69 // A6 * B1 nop __LINE__ } { .mfb nop __LINE__ (p3) FMA_A f68 = f45, f57, f68 // A6 * B2 nop __LINE__ } ;;/* 54 */ { .mfb nop __LINE__ (p3) FMA f85 = f45, f58, f85 // A6 * B3 nop __LINE__ } { .mfb nop __LINE__ (p3) FMA_A f84 = f45, f59, f84 // A6 * B4 nop __LINE__ } ;;/* 55 */ { .mfb nop __LINE__ (p3) FMA f101 = f45, f60, f101 // A6 * B5 nop __LINE__ } { .mfb nop __LINE__ (p3) FMA_A f100 = f45, f61, f100 // A6 * B6 nop __LINE__ } ;;/* 56 */ { .mfb nop __LINE__ (p3) FMA f117 = f45, f62, f117 // A6 * B7 nop __LINE__ } { .mfb nop __LINE__ (p3) FMA_A f116 = f45, f63, f116 // A6 * B8 nop __LINE__ } ;;/* 57 */ { .mfb nop __LINE__ (p3) FMA f70 = f46, f56, f70 // A7 * B1 nop __LINE__ } { .mfb nop __LINE__ (p3) FMA_B f71 = f46, f57, f71 // A7 * B2 nop __LINE__ } ;;/* 58 */ { .mfb nop __LINE__ (p3) FMA f86 = f46, f58, f86 // A7 * B3 nop __LINE__ } { .mfb nop __LINE__ (p3) FMA_B f87 = f46, f59, f87 // A7 * B4 nop __LINE__ } ;;/* 59 */ { .mfb nop __LINE__ (p3) FMA f102 = f46, f60, f102 // A7 * B5 nop __LINE__ } { .mfb nop __LINE__ (p3) FMA_B f103 = f46, f61, f103 // A7 * B6 nop __LINE__ } ;;/* 60 */ { .mfb nop __LINE__ (p3) FMA f118 = f46, f62, f118 // A7 * B7 nop __LINE__ } { .mfb nop __LINE__ (p3) FMA_B f119 = f46, f63, f119 // A7 * B8 nop __LINE__ } ;;/* 61 */ { .mfb nop __LINE__ (p3) FMA f71 = f47, f56, f71 // A8 * B1 nop __LINE__ } { .mfb nop __LINE__ (p3) FMA_A f70 = f47, f57, f70 // A8 * B2 nop __LINE__ } ;;/* 62 */ { .mfb nop __LINE__ (p3) FMA f87 = f47, f58, f87 // A8 * B3 nop __LINE__ } { .mfb nop __LINE__ (p3) FMA_A f86 = f47, f59, f86 // A8 * B4 nop __LINE__ } ;;/* 63 */ { .mfb nop __LINE__ (p3) FMA f103 = f47, f60, f103 // A8 * B5 nop __LINE__ } { .mfb nop __LINE__ (p3) FMA_A f102 = f47, f61, f102 // A8 * B6 nop __LINE__ } ;;/* 64 */ { .mfi nop __LINE__ (p3) FMA f119 = f47, f62, f119 // A8 * B7 adds L = -1, L } { .mfb nop __LINE__ (p3) FMA_A f118 = f47, f63, f118 // A8 * B8 br.cloop.sptk.few .L012 } ;;.L018:#if defined(LN) || defined(RT)#ifdef LN adds r2 = -4, KK#else adds r2 = -4, KK#endif ;; shladd r2 = r2, ZBASE_SHIFT, r0 ;; shladd AOFFSET = r2, 2, AORIG shladd BOFFSET = r2, 2, B ;; #endif#if defined(LN) || defined(LT) LDFPD f72, f73 = [BOFFSET], 2 * SIZE ;; LDFPD f74, f75 = [BOFFSET], 2 * SIZE ;; LDFPD f76, f77 = [BOFFSET], 2 * SIZE ;; LDFPD f78, f79 = [BOFFSET], 2 * SIZE ;; LDFPD f88, f89 = [BOFFSET], 2 * SIZE ;; LDFPD f90, f91 = [BOFFSET], 2 * SIZE ;; LDFPD f92, f93 = [BOFFSET], 2 * SIZE ;; { .mfi LDFPD f94, f95 = [BOFFSET], 2 * SIZE FSUB f64 = f72, f64 nop __LINE__ } { .mfi nop __LINE__ FSUB_A f65 = f73, f65 nop __LINE__ } ;; { .mfi LDFPD f104, f105 = [BOFFSET], 2 * SIZE FSUB f80 = f74, f80 nop __LINE__ } { .mfi nop __LINE__ FSUB_A f81 = f75, f81 nop __LINE__ } ;; { .mfi LDFPD f106, f107 = [BOFFSET], 2 * SIZE FSUB f96 = f76, f96 nop __LINE__ } { .mfi nop __LINE__ FSUB_A f97 = f77, f97 nop __LINE__ } ;; { .mfi LDFPD f108, f109 = [BOFFSET], 2 * SIZE FSUB f112 = f78, f112 nop __LINE__ } { .mfi nop __LINE__ FSUB_A f113 = f79, f113 nop __LINE__ } ;; { .mfi LDFPD f110, f111 = [BOFFSET], 2 * SIZE FSUB f66 = f88, f66 nop __LINE__ } { .mfi nop __LINE__ FSUB_A f67 = f89, f67 nop __LINE__ } ;; { .mfi LDFPD f120, f121 = [BOFFSET], 2 * SIZE FSUB f82 = f90, f82 nop __LINE__ } { .mfi nop __LINE__ FSUB_A f83 = f91, f83 nop __LINE__ } ;; { .mfi LDFPD f122, f123 = [BOFFSET], 2 * SIZE FSUB f98 = f92, f98 nop __LINE__ } { .mfi nop __LINE__ FSUB_A f99 = f93, f99 nop __LINE__ } ;; { .mfi LDFPD f124, f125 = [BOFFSET], 2 * SIZE FSUB f114 = f94, f114 nop __LINE__ } { .mfi nop __LINE__ FSUB_A f115 = f95, f115 nop __LINE__ } ;; { .mfi LDFPD f126, f127 = [BOFFSET] FSUB f68 = f104, f68 adds BOFFSET = -30 * SIZE, BOFFSET } { .mfi nop __LINE__ FSUB_A f69 = f105, f69#ifdef LN adds AOFFSET = 30 * SIZE, AOFFSET#else nop __LINE__#endif } ;; { .mfi LDFPD f72, f73 = [AOFFSET] FSUB f84 = f106, f84#ifdef LN adds AOFFSET = - 2 * SIZE, AOFFSET#else adds AOFFSET = 2 * SIZE, AOFFSET#endif } { .mfi nop __LINE__ FSUB_A f85 = f107, f85 nop __LINE__ } ;; { .mfi LDFPD f74, f75 = [AOFFSET] FSUB f100 = f108, f100#ifdef LN adds AOFFSET = - 2 * SIZE, AOFFSET#else adds AOFFSET = 2 * SIZE, AOFFSET#endif } { .mfi nop __LINE__ FSUB_A f101 = f109, f101 nop __LINE__ } ;; { .mfi nop __LINE__ FSUB f116 = f110, f116 nop __LINE__ } { .mfi nop __LINE__ FSUB_A f117 = f111, f117 nop __LINE__ } ;; { .mfi nop __LINE__ FSUB f70 = f120, f70 nop __LINE__ } { .mfi nop __LINE__ FSUB_A f71 = f121, f71 nop __LINE__ } ;; { .mfi nop __LINE__ FSUB f86 = f122, f86 nop __LINE__ } { .mfi nop __LINE__ FSUB_A f87 = f123, f87 nop __LINE__ } ;; { .mfi nop __LINE__ FSUB f102 = f124, f102 nop __LINE__ } { .mfi nop __LINE__ FSUB_A f103 = f125, f103 nop __LINE__ } ;; { .mfi nop __LINE__ FSUB f118 = f126, f118 nop __LINE__ } { .mfi nop __LINE__ FSUB_A f119 = f127, f119 nop __LINE__ } ;;#else LDFPD f72, f73 = [AOFFSET], 2 * SIZE ;; LDFPD f74, f75 = [AOFFSET], 2 * SIZE ;; LDFPD f76, f77 = [AOFFSET], 2 * SIZE ;; LDFPD f78, f79 = [AOFFSET], 2 * SIZE ;; LDFPD f88, f89 = [AOFFSET], 2 * SIZE ;; LDFPD f90, f91 = [AOFFSET], 2 * SIZE ;; { .mfi LDFPD f92, f93 = [AOFFSET], 2 * SIZE FSUB f64 = f72, f64 nop __LINE__ } { .mfi nop __LINE__ FSUB f65 = f73, f65 nop __LINE__ } ;; { .mfi LDFPD f94, f95 = [AOFFSET], 2 * SIZE FSUB f66 = f74, f66 nop __LINE__ } { .mfi nop __LINE__ FSUB f67 = f75, f67 nop __LINE__ } ;; { .mfi LDFPD f104, f105 = [AOFFSET], 2 * SIZE FSUB f68 = f76, f68 nop __LINE__ } { .mfi nop __LINE__ FSUB f69 = f77, f69 nop __LINE__ } ;; { .mfi LDFPD f106, f107 = [AOFFSET], 2 * SIZE FSUB f70 = f78, f70 nop __LINE__ } { .mfi nop __LINE__ FSUB f71 = f79, f71 nop __LINE__ } ;; { .mfi LDFPD f108, f109 = [AOFFSET], 2 * SIZE FSUB f80 = f88, f80 nop __LINE__ } { .mfi nop __LINE__ FSUB f81 = f89, f81 nop __LINE__ } ;; { .mfi LDFPD f110, f111 = [AOFFSET], 2 * SIZE FSUB f82 = f90, f82 nop __LINE__ } { .mfi nop __LINE__ FSUB f83 = f91, f83 nop __LINE__ } ;; { .mfi LDFPD f120, f121 = [AOFFSET], 2 * SIZE FSUB f84 = f92, f84 nop __LINE__ } { .mfi nop __LINE__ FSUB f85 = f93, f85 nop __LINE__ } ;; { .mfi LDFPD f122, f123 = [AOFFSET], 2 * SIZE FSUB f86 = f94, f86 nop __LINE__ } { .mfi nop __LINE__ FSUB f87 = f95, f87 nop __LINE__ } ;; { .mfi LDFPD f124, f125 = [AOFFSET], 2 * SIZE FSUB f96 = f104, f96 nop __LINE__ } { .mfi nop __LINE__ FSUB f97 = f105, f97 nop __LINE__ } ;; { .mfi LDFPD f126, f127 = [AOFFSET] FSUB f98 = f106, f98 adds AOFFSET = -30 * SIZE, AOFFSET } { .mfi nop __LINE__ FSUB f99 = f107, f99#ifdef RT adds BOFFSET = 30 * SIZE, BOFFSET#else nop __LINE__#endif } ;; { .mfi LDFPD f72, f73 = [BOFFSET] FSUB f100 = f108, f100#ifdef RN adds BOFFSET = 2 * SIZE, BOFFSET#else adds BOFFSET = - 2 * SIZE, BOFFSET#endif } { .mfi nop __LINE__ FSUB f101 = f109, f101 nop __LINE__ } ;; { .mfi LDFPD f74, f75 = [BOFFSET] FSUB f102 = f110, f102#ifdef RN adds BOFFSET = 2 * SIZE, BOFFSET#else adds BOFFSET = - 2 * SIZE, BOFFSET#endif } { .mfi nop __LINE__ FSUB f103 = f111, f103 nop __LINE__ } ;; { .mfi nop __LINE__ FSUB f112 = f120, f112 nop __LINE__ } { .mfi nop __LINE__ FSUB f113 = f121, f113 nop __LINE__ } ;; { .mfi nop __LINE__ FSUB f114 = f122, f114 nop __LINE__ } { .mfi nop __LINE__ FSUB f115 = f123, f115 nop __LINE__ } ;; { .mfi nop __LINE__ FSUB f116 = f124, f116 nop __LINE__ } { .mfi nop __LINE__ FSUB f117 = f125, f117 nop __LINE__ } ;; { .mfi nop __LINE__ FSUB f118 = f126, f118 nop __LINE__ } { .mfi nop __LINE__ FSUB f119 = f127, f119 nop __LINE__ } ;;#endif#ifdef LN { .mfi LDFPD f76, f77 = [AOFFSET] FMPY f32 = f72, f70 adds AOFFSET = - 2 * SIZE, AOFFSET } { .mfi nop __LINE__ FMPY f36 = f72, f102 nop __LINE__ } ;; { .mfi LDFPD f78, f79 = [AOFFSET] FMPY f33 = f73, f70 adds AOFFSET = - 4 * SIZE, AOFFSET } { .mfi nop __LINE__ FMPY f37 = f73, f102 nop __LINE__ } ;; { .mfi LDFPD f88, f89 = [AOFFSET] FMPY f34 = f72, f86 adds AOFFSET = - 2 * SIZE, AOFFSET } { .mfi nop __LINE__ FMPY f38 = f72, f118 nop __LINE__ } ;; { .mfi LDFPD f90, f91 = [AOFFSET] FMPY f35 = f73, f86 adds AOFFSET = - 2 * SIZE, AOFFSET } { .mfi nop __LINE__ FMPY f39 = f73, f118 nop __LINE__ } ;; { .mfi LDFPD f92, f93 = [AOFFSET] FMA_C f70 = f73, f71, f32 adds AOFFSET = - 6 * SIZE, AOFFSET } { .mfi nop __LINE__ FMA_C f102 = f73, f103, f36 adds C1 = -2 * SIZE, C1 } ;; { .mfi LDFPD f104, f105 = [AOFFSET] FMA_D f71 = f72, f71, f33 adds AOFFSET = - 2 * SIZE, AOFFSET } { .mfi nop __LINE__ FMA_D f103 = f72, f103, f37 adds C2 = -2 * SIZE, C2 } ;; { .mfi LDFPD f106, f107 = [AOFFSET] FMA_C f86 = f73, f87, f34 adds AOFFSET = - 8 * SIZE, AOFFSET } { .mfi nop __LINE__ FMA_C f118 = f73, f119, f38 adds C3 = -2 * SIZE, C3 } ;; { .mfi LDFPD f120, f121 = [AOFFSET] FMA_D f87 = f72, f87, f35 adds BOFFSET2 = 28 * SIZE, BOFFSET } { .mfi nop __LINE__ FMA_D f119 = f72, f119, f39 adds BOFFSET = 24 * SIZE, BOFFSET } ;; { .mfi STFD [BOFFSET] = f70, SIZE FNMA f68 = f74, f70, f68 adds C4 = -2 * SIZE, C4 } { .mfi STFD [BOFFSET2] = f102, SIZE FNMA f100 = f74, f102, f100 nop __LINE__ } ;; { .mfi STFD [BOFFSET] = f71, SIZE FMA_A f69 = f75, f70, f69 nop __LINE__ } { .mfi STFD [BOFFSET2] = f103, SIZE FMA_A f101 = f75, f102, f101 nop __LINE__ } ;; { .mfi STFD [BOFFSET] = f86, SIZE FNMA f84 = f74, f86, f84 nop __LINE__ } { .mfi STFD [BOFFSET2] = f118, SIZE FNMA f116 = f74, f118, f116 nop __LINE__ } ;; { .mfi STFD [BOFFSET] = f87, -11 * SIZE FMA_A f85 = f75, f86, f85 nop __LINE__ } { .mfi STFD [BOFFSET2] = f119, -11 * SIZE FMA_A f117 = f75, f118, f117 nop __LINE__ } ;; { .mfi STFD [C1 ] = f70, SIZE FMA_B f68 = f75, f71, f68 nop __LINE__ } { .mfi STFD [C3 ] = f102, SIZE FMA_B f100 = f75, f103, f100 nop __LINE__ } ;; { .mfi STFD [C1 ] = f71, -3 * SIZE FNMA f69 = f74, f71, f69 nop __LINE__ } { .mfi STFD [C3 ] = f103, -3 * SIZE FNMA f101 = f74, f103, f101 nop __LINE__ } ;; { .mfi STFD [C2 ] = f86, SIZE FMA_B f84 = f75, f87, f84 nop __LINE__ } { .mfi STFD [C4 ] = f118, SIZE FMA_B f116 = f75, f119, f116 nop __LINE__ } ;; { .mfi STFD [C2 ] = f87, -3 * SIZE FNMA f85 = f74, f87, f85 nop __LINE__ } { .mfi STFD [C4 ] = f119, -3 * SIZE FNMA f117 = f74, f119, f117 nop __LINE__ } ;; { .mfi nop __LINE__ FNMA f66 = f76, f70, f66 nop __LINE__ } { .mfi nop __LINE__ FNMA f98 = f76, f102, f98 nop __LINE__ } ;; { .mfi nop __LINE__ FMA_A f67 = f77, f70, f67 nop __LINE__ } { .mfi nop __LINE__ FMA_A f99 = f77, f102, f99 nop __LINE__ } ;; { .mfi nop __LINE__ FNMA f82 = f76, f86, f82 nop __LINE__ } { .mfi nop __LINE__ FNMA f114 = f76, f118, f114 nop __LINE__ } ;; { .mfi nop __LINE__ FMA_A f83 = f77, f86, f83 nop __LINE__ } { .mfi nop __LINE__ FMA_A f115 = f77, f118, f115 nop __LINE__ } ;; { .mfi nop __LINE__ FMA_B f66 = f77, f71, f66 nop __LINE__ } { .mfi nop __LINE__ FMA_B f98 = f77, f103, f98 nop __LINE__ } ;; { .mfi nop __LINE__ FNMA f67 = f76, f71, f67 nop __LINE__ } { .mfi nop __LINE__ FNMA f99 = f76, f103, f99 nop __LINE__ } ;; { .mfi nop __LINE__ FMA_B f82 = f77, f87, f82 nop __LINE__ } { .mfi nop __LINE__ FMA_B f114 = f77, f119, f114
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -