⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 trsm_kernel_lt.s

📁 Optimized GotoBLAS libraries
💻 S
📖 第 1 页 / 共 5 页
字号:
	nop	__LINE__	(p3) FMA	f105  = f41, f61, f105	// A2 * B6	nop	__LINE__	}	;;/* 40 */	{ .mfb	nop	__LINE__	(p3) FMA	f113  = f41, f62, f113	// A2 * B7	nop	__LINE__	}	{ .mfb	nop	__LINE__	(p3) FMA	f121  = f41, f63, f121	// A2 * B8	nop	__LINE__	}	;; /* 41 */	{ .mfb	nop	__LINE__	(p3) FMA	f66   = f42, f56, f66	// A3 * B1	nop	__LINE__	}	{ .mfb	nop	__LINE__	(p3) FMA	f74   = f42, f57, f74	// A3 * B2	nop	__LINE__	}	;;/* 42 */	{ .mfb	nop	__LINE__	(p3) FMA	f82   = f42, f58, f82	// A3 * B3	nop	__LINE__	}	{ .mfb	nop	__LINE__	(p3) FMA	f90   = f42, f59, f90	// A3 * B4	nop	__LINE__	}	;;/* 43 */	{ .mfb	nop	__LINE__	(p3) FMA	f98   = f42, f60, f98	// A3 * B5	nop	__LINE__	}	{ .mfb	nop	__LINE__	(p3) FMA	f106  = f42, f61, f106	// A3 * B6	nop	__LINE__	}	;;/* 44 */	{ .mfb	nop	__LINE__	(p3) FMA	f114  = f42, f62, f114	// A3 * B7	nop	__LINE__	}	{ .mfb	nop	__LINE__	(p3) FMA	f122  = f42, f63, f122	// A3 * B8	nop	__LINE__	}	;;/* 45 */	{ .mfb	nop	__LINE__	(p3) FMA	f67   = f43, f56, f67	// A4 * B1	nop	__LINE__	}	{ .mfb	nop	__LINE__	(p3) FMA	f75   = f43, f57, f75	// A4 * B2	nop	__LINE__	}	;;/* 46 */	{ .mfb	nop	__LINE__	(p3) FMA	f83   = f43, f58, f83	// A4 * B3	nop	__LINE__	}	{ .mfb	nop	__LINE__	(p3) FMA	f91   = f43, f59, f91	// A4 * B4	nop	__LINE__	}	;;/* 47 */	{ .mfb	nop	__LINE__	(p3) FMA	f99   = f43, f60, f99	// A4 * B5	nop	__LINE__	}	{ .mfb	nop	__LINE__	(p3) FMA	f107  = f43, f61, f107	// A4 * B6	nop	__LINE__	}	;;/* 48 */	{ .mfb	nop	__LINE__	(p3) FMA	f115  = f43, f62, f115	// A4 * B7	nop	__LINE__	}	{ .mfb	nop	__LINE__	(p3) FMA	f123  = f43, f63, f123	// A4 * B8	nop	__LINE__	}	;;/* 49 */	{ .mfb	nop	__LINE__	(p3) FMA	f68   = f44, f56, f68	// A5 * B1	nop	__LINE__	}	{ .mfb	nop	__LINE__	(p3) FMA	f76   = f44, f57, f76	// A5 * B2	nop	__LINE__	}	;;/* 50 */	{ .mfb	nop	__LINE__	(p3) FMA	f84   = f44, f58, f84	// A5 * B3	nop	__LINE__	}	{ .mfb	nop	__LINE__	(p3) FMA	f92   = f44, f59, f92	// A5 * B4	nop	__LINE__	}	;;/* 51 */	{ .mfb	nop	__LINE__	(p3) FMA	f100  = f44, f60, f100	// A5 * B5	nop	__LINE__	}	{ .mfb	nop	__LINE__	(p3) FMA	f108  = f44, f61, f108	// A5 * B6	nop	__LINE__	}	;;/* 52 */	{ .mfb	nop	__LINE__	(p3) FMA	f116  = f44, f62, f116	// A5 * B7	nop	__LINE__	}	{ .mfb	nop	__LINE__	(p3) FMA	f124  = f44, f63, f124	// A5 * B8	nop	__LINE__	}	;;/* 53 */	{ .mfb	nop	__LINE__	(p3) FMA	f69   = f45, f56, f69	// A6 * B1	nop	__LINE__	}	{ .mfb	nop	__LINE__	(p3) FMA	f77   = f45, f57, f77	// A6 * B2	nop	__LINE__	}	;;/* 54 */	{ .mfb	nop	__LINE__	(p3) FMA	f85   = f45, f58, f85	// A6 * B3	nop	__LINE__	}	{ .mfb	nop	__LINE__	(p3) FMA	f93   = f45, f59, f93	// A6 * B4	nop	__LINE__	}	;;/* 55 */	{ .mfb	nop	__LINE__ 	(p3) FMA	f101  = f45, f60, f101	// A6 * B5	nop	__LINE__	}	{ .mfb	nop	__LINE__	(p3) FMA	f109  = f45, f61, f109	// A6 * B6	nop	__LINE__	}	;;/* 56 */	{ .mfb	nop	__LINE__	(p3) FMA	f117  = f45, f62, f117	// A6 * B7	nop	__LINE__	}	{ .mfb	nop	__LINE__	(p3) FMA	f125  = f45, f63, f125	// A6 * B8	nop	__LINE__	}	;;/* 57 */	{ .mfb	nop	__LINE__	(p3) FMA	f70   = f46, f56, f70	// A7 * B1	nop	__LINE__	}	{ .mfb	nop	__LINE__	(p3) FMA	f78   = f46, f57, f78	// A7 * B2	nop	__LINE__	}	;;/* 58 */	{ .mfb	nop	__LINE__	(p3) FMA	f86   = f46, f58, f86	// A7 * B3	nop	__LINE__	}	{ .mfb	nop	__LINE__	(p3) FMA	f94   = f46, f59, f94	// A7 * B4	nop	__LINE__	}	;;/* 59 */	{ .mfb	nop	__LINE__	(p3) FMA	f102  = f46, f60, f102	// A7 * B5	nop	__LINE__	}	{ .mfb	nop	__LINE__	(p3) FMA	f110  = f46, f61, f110	// A7 * B6	nop	__LINE__	}	;;/* 60 */	{ .mfb	nop	__LINE__	(p3) FMA	f118  = f46, f62, f118	// A7 * B7	nop	__LINE__	}	{ .mfb	nop	__LINE__	(p3) FMA	f126  = f46, f63, f126	// A7 * B8	nop	__LINE__	}	;;/* 61 */	{ .mfb	nop	__LINE__	(p3) FMA	f71   = f47, f56, f71	// A8 * B1	nop	__LINE__	}	{ .mfb	nop	__LINE__	(p3) FMA	f79   = f47, f57, f79	// A8 * B2	nop	__LINE__	}	;;/* 62 */	{ .mfb	nop	__LINE__	(p3) FMA	f87   = f47, f58, f87	// A8 * B3	nop	__LINE__	}	{ .mfb	nop	__LINE__	(p3) FMA	f95   = f47, f59, f95	// A8 * B4	nop	__LINE__	}	;;/* 63 */	{ .mfb	nop	__LINE__	(p3) FMA	f103  = f47, f60, f103	// A8 * B5	nop	__LINE__	}	{ .mfb	nop	__LINE__	(p3) FMA	f111  = f47, f61, f111	// A8 * B6	nop	__LINE__	}	;;/* 64 */	{ .mfi	nop	__LINE__	(p3) FMA	f119  = f47, f62, f119	// A8 * B7	adds	L = -1, L	}	{ .mfb 	adds	AOFFSET2 = 4 * SIZE, AOFFSET	(p3) FMA	f127  = f47, f63, f127	// A8 * B8	br.cloop.sptk.few .L012	}	;;.L018:#ifdef LT	LDFPD	f32, f33 = [BOFFSET], 2 * SIZE	;;	LDFPD	f34, f35 = [BOFFSET], 2 * SIZE	;;	LDFPD	f36, f37 = [BOFFSET], 2 * SIZE	;;	LDFPD	f38, f39 = [BOFFSET], 2 * SIZE	;;	LDFPD	f40, f41 = [BOFFSET], 2 * SIZE	;;	LDFPD	f42, f43 = [BOFFSET], 2 * SIZE	;;	LDFPD	f44, f45 = [BOFFSET], 2 * SIZE	;;	LDFPD	f46, f47 = [BOFFSET], 2 * SIZE	;;	{ .mfi	LDFPD	f48, f49 = [BOFFSET], 2 * SIZE	FSUB	f64  = f32, f64	nop	__LINE__	}	{ .mfi	nop	__LINE__	FSUB	f72  = f33, f72	nop	__LINE__	}	;;	{ .mfi	LDFPD	f50, f51 = [BOFFSET], 2 * SIZE	FSUB	f80  = f34, f80	nop	__LINE__	}	{ .mfi	nop	__LINE__	FSUB	f88  = f35, f88	nop	__LINE__	}	;;	{ .mfi	LDFPD	f52, f53 = [BOFFSET], 2 * SIZE	FSUB	f96  = f36, f96	nop	__LINE__	}	{ .mfi	nop	__LINE__	FSUB	f104 = f37, f104	nop	__LINE__	}	;;	{ .mfi	LDFPD	f54, f55 = [BOFFSET], 2 * SIZE	FSUB	f112 = f38, f112	nop	__LINE__	}	{ .mfi	nop	__LINE__	FSUB	f120 = f39, f120	nop	__LINE__	}	;;	{ .mfi	LDFPD	f56, f57 = [BOFFSET], 2 * SIZE	FSUB	f65  = f40, f65	nop	__LINE__	}	{ .mfi	nop	__LINE__	FSUB	f73  = f41, f73	nop	__LINE__	}	;;	{ .mfi	LDFPD	f58, f59 = [BOFFSET], 2 * SIZE	FSUB	f81  = f42, f81	nop	__LINE__	}	{ .mfi	nop	__LINE__	FSUB	f89  = f43, f89	nop	__LINE__	}	;;	{ .mfi	LDFPD	f60, f61 = [BOFFSET], 2 * SIZE	FSUB	f97  = f44, f97	nop	__LINE__	}	{ .mfi	nop	__LINE__	FSUB	f105 = f45, f105	nop	__LINE__	}	;;	{ .mfi	LDFPD	f62, f63 = [BOFFSET], 2 * SIZE	FSUB	f113 = f46, f113	nop	__LINE__	}	{ .mfi	nop	__LINE__	FSUB	f121 = f47, f121	nop	__LINE__	}	;;	{ .mfi	LDFPD	f32, f33 = [BOFFSET], 2 * SIZE	FSUB	f66  = f48, f66	nop	__LINE__	}	{ .mfi	nop	__LINE__	FSUB	f74  = f49, f74	nop	__LINE__	}	;;	{ .mfi	LDFPD	f34, f35 = [BOFFSET], 2 * SIZE	FSUB	f82  = f50, f82	nop	__LINE__	}	{ .mfi	nop	__LINE__	FSUB	f90  = f51, f90	nop	__LINE__	}	;;	{ .mfi	LDFPD	f36, f37 = [BOFFSET], 2 * SIZE	FSUB	f98  = f52, f98	nop	__LINE__	}	{ .mfi	nop	__LINE__	FSUB	f106 = f53, f106	nop	__LINE__	}	;;	{ .mfi	LDFPD	f38, f39 = [BOFFSET], 2 * SIZE	FSUB	f114 = f54, f114	nop	__LINE__	}	{ .mfi	nop	__LINE__	FSUB	f122 = f55, f122	nop	__LINE__	}	;;	{ .mfi	LDFPD	f40, f41 = [BOFFSET], 2 * SIZE	FSUB	f67  = f56, f67	nop	__LINE__	}	{ .mfi	nop	__LINE__	FSUB	f75  = f57, f75	nop	__LINE__	}	;;	{ .mfi	LDFPD	f42, f43 = [BOFFSET], 2 * SIZE	FSUB	f83  = f58, f83	nop	__LINE__	}	{ .mfi	nop	__LINE__	FSUB	f91  = f59, f91	nop	__LINE__	}	;;	{ .mfi	LDFPD	f44, f45 = [BOFFSET], 2 * SIZE	FSUB	f99  = f60, f99	nop	__LINE__	}	{ .mfi	nop	__LINE__	FSUB	f107 = f61, f107	nop	__LINE__	}	;;	{ .mfi	LDFPD	f46, f47 = [BOFFSET], 2 * SIZE	FSUB	f115 = f62, f115	nop	__LINE__	}	{ .mfi	nop	__LINE__	FSUB	f123 = f63, f123	nop	__LINE__	}	;;	{ .mfi	LDFPD	f48, f49 = [BOFFSET], 2 * SIZE	FSUB	f68  = f32, f68	nop	__LINE__	}	{ .mfi	nop	__LINE__	FSUB	f76  = f33, f76	nop	__LINE__	}	;;	{ .mfi	LDFPD	f50, f51 = [BOFFSET], 2 * SIZE	FSUB	f84  = f34, f84	nop	__LINE__	}	{ .mfi	nop	__LINE__	FSUB	f92  = f35, f92	nop	__LINE__	}	;;	{ .mfi	LDFPD	f52, f53 = [BOFFSET], 2 * SIZE	FSUB	f100 = f36, f100	nop	__LINE__	}	{ .mfi	nop	__LINE__	FSUB	f108 = f37, f108	nop	__LINE__	}	;;	{ .mfi	LDFPD	f54, f55 = [BOFFSET], 2 * SIZE	FSUB	f116 = f38, f116	nop	__LINE__	}	{ .mfi	nop	__LINE__	FSUB	f124 = f39, f124	nop	__LINE__	}	;;	{ .mfi	LDFPD	f56, f57 = [BOFFSET], 2 * SIZE	FSUB	f69  = f40, f69	nop	__LINE__	}	{ .mfi	nop	__LINE__	FSUB	f77  = f41, f77	nop	__LINE__	}	;;	{ .mfi	LDFPD	f58, f59 = [BOFFSET], 2 * SIZE	FSUB	f85  = f42, f85	nop	__LINE__	}	{ .mfi	nop	__LINE__	FSUB	f93  = f43, f93	nop	__LINE__	}	;;	{ .mfi	LDFPD	f60, f61 = [BOFFSET], 2 * SIZE	FSUB	f101 = f44, f101	nop	__LINE__	}	{ .mfi	nop	__LINE__	FSUB	f109 = f45, f109	nop	__LINE__	}	;;	{ .mfi	LDFPD	f62, f63 = [BOFFSET]	FSUB	f117 = f46, f117	adds	BOFFSET = -62 * SIZE, BOFFSET	}	{ .mfi	nop	__LINE__	FSUB	f125 = f47, f125	nop	__LINE__	}	;;	{ .mfi	nop	__LINE__	FSUB	f70  = f48, f70	nop	__LINE__	}	{ .mfi	nop	__LINE__	FSUB	f78  = f49, f78	nop	__LINE__	}	{ .mfi	nop	__LINE__	FSUB	f86  = f50, f86	nop	__LINE__	}	{ .mfi	nop	__LINE__	FSUB	f94  = f51, f94	nop	__LINE__	}	;;	{ .mfi	LDFPD	f32, f33 = [AOFFSET]	FSUB	f102 = f52, f102	nop	__LINE__	}	{ .mfi	nop	__LINE__	FSUB	f110 = f53, f110	nop	__LINE__	}	{ .mfi	nop	__LINE__	FSUB	f118 = f54, f118	nop	__LINE__	}	{ .mfi	nop	__LINE__	FSUB	f126 = f55, f126	adds	AOFFSET =   2 * SIZE, AOFFSET	}	;;	{ .mfi	nop	__LINE__	FSUB	f71  = f56, f71	nop	__LINE__	}	{ .mfi	nop	__LINE__	FSUB	f79  = f57, f79	nop	__LINE__	}	{ .mfi	nop	__LINE__	FSUB	f87  = f58, f87	nop	__LINE__	}	{ .mfi	nop	__LINE__	FSUB	f95  = f59, f95	nop	__LINE__	}	{ .mfi	nop	__LINE__	FSUB	f103 = f60, f103	nop	__LINE__	}	{ .mfi	nop	__LINE__	FSUB	f111 = f61, f111	nop	__LINE__	}	{ .mfi	nop	__LINE__	FSUB	f119 = f62, f119	nop	__LINE__	}	{ .mfi	nop	__LINE__	FSUB	f127 = f63, f127	nop	__LINE__	}	;;	{ .mfi	LDFPD	f34, f35 = [AOFFSET], 2 * SIZE	FMPY	f64  = f64,  f32	nop	__LINE__	}	{ .mfi	nop	__LINE__	FMPY	f96  = f96,  f32 	adds	BOFFSET2 = 4 * SIZE, BOFFSET	}	;;	{ .mfi	LDFPD	f36, f37 = [AOFFSET], 2 * SIZE	FMPY	f72  = f72,  f32	nop	__LINE__	}	{ .mfi	nop	__LINE__	FMPY	f104 = f104, f32	nop	__LINE__	}	;;	{ .mfi	LDFPD	f38, f39 = [AOFFSET]	FMPY	f80  = f80,  f32	adds	AOFFSET = 3 * SIZE, AOFFSET	}	{ .mfi	nop	__LINE__	FMPY	f112 = f112, f32	nop	__LINE__	}	;;	{ .mfi	LDFD	f40 = [AOFFSET], 1 * SIZE	FMPY	f88  = f88,  f32	nop	__LINE__	}	{ .mfi	nop	__LINE__	FMPY	f120 = f120, f32	nop	__LINE__	}	;;	{ .mfi	LDFPD	f41, f42 = [AOFFSET], 2 * SIZE	FNMA	f65  = f64,  f33, f65	nop	__LINE__	}	{ .mfi	nop	__LINE__	FNMA	f97  = f96,  f33, f97	nop	__LINE__	}	;;	{ .mfi	LDFPD	f43, f44 = [AOFFSET], 2 * SIZE	FNMA	f73  = f72,  f33, f73	nop	__LINE__	}	{ .mfi	nop	__LINE__	FNMA	f105 = f104, f33, f105	nop	__LINE__	}	;;	{ .mfi	LDFPD	f45, f46 = [AOFFSET]	FNMA	f81  = f80,  f33, f81	adds	AOFFSET = 4 * SIZE, AOFFSET	}	{ .mfi	nop	__LINE__	FNMA	f113 = f112, f33, f113	nop	__LINE__	}	;;	{ .mfi	LDFPD	f47, f48 = [AOFFSET], 2 * SIZE	FNMA	f89  = f88,  f33, f89	nop	__LINE__	}	{ .mfi	nop	__LINE__	FNMA	f121 = f120, f33, f121	nop	__LINE__	}	;;	{ .mfi	LDFPD	f49, f50 = [AOFFSET], 2 * SIZE	FNMA	f66  = f64,  f34, f66	nop	__LINE__	}	{ .mfi	nop	__LINE__	FNMA	f98  = f96,  f34, f98	nop	__LINE__	}	;;	{ .mfi	LDFPD	f51, f52 = [AOFFSET]	FNMA	f74  = f72,  f34, f74	adds	AOFFSET = 5 * SIZE, AOFFSET	}	{ .mfi	nop	__LINE__	FNMA	f106 = f104, f34, f106	nop	__LINE__	}	;;	{ .mfi	LDFD	f53 = [AOFFSET], 1 * SIZE	FNMA	f82  = f80,  f34, f82	nop	__LINE__	}	{ .mfi	nop	__LINE__	FNMA	f114 = f112, f34, f114	nop	__LINE__	}	;;	{ .mfi	LDFPD	f54, f55 = [AOFFSET], 2 * SIZE	FNMA	f90  = f88,  f34, f90	nop	__LINE__	}	{ .mfi	nop	__LINE__	FNMA	f122 = f120, f34, f122	nop	__LINE__	}	;;	{ .mfi	LDFPD	f56, f57 = [AOFFSET]	FNMA	f67  = f64,  f35, f67	adds	AOFFSET = 6 * SIZE, AOFFSET	}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -