⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 gemm_kernel.s

📁 Optimized GotoBLAS libraries
💻 S
📖 第 1 页 / 共 5 页
字号:
	(p3) FMA	f104  = f40, f61, f104	// A1 * B6	nop	__LINE__	}	;;/* 36 */	{ .mfi	(p4) LDFPD	f54, f55 = [BOFFSET], 2 * SIZE	(p3) FMA	f112  = f40, f62, f112	// A1 * B7	nop	__LINE__	}	{ .mfi	nop	__LINE__	(p3) FMA	f120  = f40, f63, f120	// A1 * B8	nop	__LINE__	}	;;/* 37 */	{ .mfi	(p4) LDFPD	f34, f35 = [AOFFSET], 2 * SIZE	(p3) FMA	f65   = f41, f56, f65	// A2 * B1	nop	__LINE__	}	{ .mfi	nop	__LINE__	(p3) FMA	f73   = f41, f57, f73	// A2 * B2	nop	__LINE__	}	;;/* 38 */	{ .mfi	(p4) LDFPD	f36, f37 = [AOFFSET], 2 * SIZE	(p3) FMA	f81   = f41, f58, f81	// A2 * B3	nop	__LINE__	}	{ .mfi	nop	__LINE__	(p3) FMA	f89   = f41, f59, f89	// A2 * B4	nop	__LINE__	}	;;/* 39 */	{ .mfi	(p4) LDFPD	f38, f39 = [AOFFSET], 2 * SIZE	(p3) FMA	f97   = f41, f60, f97	// A2 * B5	nop	__LINE__	}	{ .mfi	nop	__LINE__	(p3) FMA	f105  = f41, f61, f105	// A2 * B6	nop	__LINE__	}	;;/* 40 */	{ .mfi#if! defined(TRMMKERNEL) && !defined(BETAZERO)	(p5) LDFD	f6  = [C1 ], SIZE#else	nop	__LINE__#endif	(p3) FMA	f113  = f41, f62, f113	// A2 * B7	nop	__LINE__	}	{ .mfi#if! defined(TRMMKERNEL) && !defined(BETAZERO)	(p5) LDFD	f7  = [C9 ], SIZE#else	nop	__LINE__#endif	(p3) FMA	f121  = f41, f63, f121	// A2 * B8	nop	__LINE__	}	;; /* 41 */	{ .mfi#if! defined(TRMMKERNEL) && !defined(BETAZERO)	(p5) LDFD	f10 = [C1 ], SIZE#else	nop	__LINE__#endif	(p3) FMA	f66   = f42, f56, f66	// A3 * B1	nop	__LINE__	}	{ .mfi#if! defined(TRMMKERNEL) && !defined(BETAZERO)	(p5) LDFD	f11 = [C9 ], SIZE#else	nop	__LINE__#endif	(p3) FMA	f74   = f42, f57, f74	// A3 * B2	nop	__LINE__	}	;;/* 42 */	{ .mfi#if! defined(TRMMKERNEL) && !defined(BETAZERO)	(p5) LDFD	f12 = [C1 ], SIZE#else	nop	__LINE__#endif	(p3) FMA	f82   = f42, f58, f82	// A3 * B3	nop	__LINE__	}	{ .mfi#if! defined(TRMMKERNEL) && !defined(BETAZERO)	(p5) LDFD	f13 = [C9 ], SIZE#else	nop	__LINE__#endif	(p3) FMA	f90   = f42, f59, f90	// A3 * B4	nop	__LINE__	}	;;/* 43 */	{ .mfi#if! defined(TRMMKERNEL) && !defined(BETAZERO)	(p5) LDFD	f14 = [C1 ], -3 * SIZE#else	nop	__LINE__#endif	(p3) FMA	f98   = f42, f60, f98	// A3 * B5	nop	__LINE__	}	{ .mfi#if! defined(TRMMKERNEL) && !defined(BETAZERO)	(p5) LDFD	f15 = [C9 ], -3 * SIZE#else	nop	__LINE__#endif	(p3) FMA	f106  = f42, f61, f106	// A3 * B6	nop	__LINE__	}	;;/* 44 */	{ .mfi#if! defined(TRMMKERNEL) && !defined(BETAZERO)	(p5) LDFD	f16 = [C2 ], SIZE#else	nop	__LINE__#endif	(p3) FMA	f114  = f42, f62, f114	// A3 * B7	nop	__LINE__	}	{ .mfi#if! defined(TRMMKERNEL) && !defined(BETAZERO)	(p5) LDFD	f17 = [C10], SIZE#else	nop	__LINE__#endif	(p3) FMA	f122  = f42, f63, f122	// A3 * B8	nop	__LINE__	}	;;/* 45 */	{ .mfi#if! defined(TRMMKERNEL) && !defined(BETAZERO)	(p5) LDFD	f18 = [C2 ], SIZE#else	nop	__LINE__#endif	(p3) FMA	f67   = f43, f56, f67	// A4 * B1	nop	__LINE__	}	{ .mfi#if! defined(TRMMKERNEL) && !defined(BETAZERO)	(p5) LDFD	f19 = [C10], SIZE#else	nop	__LINE__#endif	(p3) FMA	f75   = f43, f57, f75	// A4 * B2	nop	__LINE__	}	;;/* 46 */	{ .mfi#if! defined(TRMMKERNEL) && !defined(BETAZERO)	(p5) LDFD	f20 = [C2 ], SIZE#else	nop	__LINE__#endif	(p3) FMA	f83   = f43, f58, f83	// A4 * B3	nop	__LINE__	}	{ .mfi#if! defined(TRMMKERNEL) && !defined(BETAZERO)	(p5) LDFD	f21 = [C10], SIZE#else	nop	__LINE__#endif	(p3) FMA	f91   = f43, f59, f91	// A4 * B4	nop	__LINE__	}	;;/* 47 */	{ .mfi#if! defined(TRMMKERNEL) && !defined(BETAZERO)	(p5) LDFD	f22 = [C2 ], -3 * SIZE#else	nop	__LINE__#endif	(p3) FMA	f99   = f43, f60, f99	// A4 * B5	nop	__LINE__	}	{ .mfi#if! defined(TRMMKERNEL) && !defined(BETAZERO)	(p5) LDFD	f23 = [C10], -3 * SIZE#else	nop	__LINE__#endif	(p3) FMA	f107  = f43, f61, f107	// A4 * B6	nop	__LINE__	}	;;/* 48 */	{ .mfi#if! defined(TRMMKERNEL) && !defined(BETAZERO)	(p5) LDFD	f24 = [C3 ], SIZE#else	nop	__LINE__#endif	(p3) FMA	f115  = f43, f62, f115	// A4 * B7	nop	__LINE__	}	{ .mfi#if! defined(TRMMKERNEL) && !defined(BETAZERO)	(p5) LDFD	f25 = [C11], SIZE#else	nop	__LINE__#endif	(p3) FMA	f123  = f43, f63, f123	// A4 * B8	nop	__LINE__	}	;;/* 49 */	{ .mfi#if! defined(TRMMKERNEL) && !defined(BETAZERO)	(p5) LDFD	f26 = [C3 ], SIZE#else	nop	__LINE__#endif	(p3) FMA	f68   = f44, f56, f68	// A5 * B1	nop	__LINE__	}	{ .mfi#if! defined(TRMMKERNEL) && !defined(BETAZERO)	(p5) LDFD	f27 = [C11], SIZE#else	nop	__LINE__#endif	(p3) FMA	f76   = f44, f57, f76	// A5 * B2	nop	__LINE__	}	;;/* 50 */	{ .mfi#if! defined(TRMMKERNEL) && !defined(BETAZERO)	(p5) LDFD	f28 = [C3 ], SIZE#else	nop	__LINE__#endif	(p3) FMA	f84   = f44, f58, f84	// A5 * B3	nop	__LINE__	}	{ .mfi#if! defined(TRMMKERNEL) && !defined(BETAZERO)	(p5) LDFD	f29 = [C11], SIZE#else	nop	__LINE__#endif	(p3) FMA	f92   = f44, f59, f92	// A5 * B4	nop	__LINE__	}	;;/* 51 */	{ .mfi#if! defined(TRMMKERNEL) && !defined(BETAZERO)	(p5) LDFD	f30 = [C3 ], -3 * SIZE#else	nop	__LINE__#endif	(p3) FMA	f100  = f44, f60, f100	// A5 * B5	nop	__LINE__	}	{ .mfi#if! defined(TRMMKERNEL) && !defined(BETAZERO)	(p5) LDFD	f31 = [C11], -3 * SIZE#else	nop	__LINE__#endif	(p3) FMA	f108  = f44, f61, f108	// A5 * B6	nop	__LINE__	}	;;/* 52 */	{ .mfi#if! defined(TRMMKERNEL) && !defined(BETAZERO)	(p5) LDFD	f32 = [C4 ], SIZE#else	nop	__LINE__#endif	(p3) FMA	f116  = f44, f62, f116	// A5 * B7	nop	__LINE__	}	{ .mfi#if! defined(TRMMKERNEL) && !defined(BETAZERO)	(p5) LDFD	f33 = [C12], SIZE#else	nop	__LINE__#endif	(p3) FMA	f124  = f44, f63, f124	// A5 * B8	nop	__LINE__	}	;;/* 53 */	{ .mfi#if! defined(TRMMKERNEL) && !defined(BETAZERO)	(p5) LDFD	f34 = [C4 ], SIZE#else	nop	__LINE__#endif	(p3) FMA	f69   = f45, f56, f69	// A6 * B1	nop	__LINE__	}	{ .mfi#if! defined(TRMMKERNEL) && !defined(BETAZERO)	(p5) LDFD	f35 = [C12], SIZE#else	nop	__LINE__#endif	(p3) FMA	f77   = f45, f57, f77	// A6 * B2	nop	__LINE__	}	;;/* 54 */	{ .mfi#if! defined(TRMMKERNEL) && !defined(BETAZERO)	(p5) LDFD	f36 = [C4 ], SIZE#else	nop	__LINE__#endif	(p3) FMA	f85   = f45, f58, f85	// A6 * B3	nop	__LINE__	}	{ .mfi#if! defined(TRMMKERNEL) && !defined(BETAZERO)	(p5) LDFD	f37 = [C12], SIZE#else	nop	__LINE__#endif	(p3) FMA	f93   = f45, f59, f93	// A6 * B4	nop	__LINE__	}	;;/* 55 */	{ .mfi#if! defined(TRMMKERNEL) && !defined(BETAZERO)	(p5) LDFD	f38 = [C4 ], -3 * SIZE#else	nop	__LINE__#endif 	(p3) FMA	f101  = f45, f60, f101	// A6 * B5	nop	__LINE__	}	{ .mfi#if! defined(TRMMKERNEL) && !defined(BETAZERO)	(p5) LDFD	f39 = [C12], -3 * SIZE#else	nop	__LINE__#endif	(p3) FMA	f109  = f45, f61, f109	// A6 * B6	nop	__LINE__	}	;;/* 56 */	{ .mfi#if! defined(TRMMKERNEL) && !defined(BETAZERO)	(p5) LDFD	f48 = [C5 ], SIZE#else	nop	__LINE__#endif	(p3) FMA	f117  = f45, f62, f117	// A6 * B7	nop	__LINE__	}	{ .mfi#if! defined(TRMMKERNEL) && !defined(BETAZERO)	(p5) LDFD	f49 = [C13], SIZE#else	nop	__LINE__#endif	(p3) FMA	f125  = f45, f63, f125	// A6 * B8	nop	__LINE__	}	;;/* 57 */	{ .mfi#if! defined(TRMMKERNEL) && !defined(BETAZERO)	(p5) LDFD	f50 = [C5 ], SIZE#else	nop	__LINE__#endif	(p3) FMA	f70   = f46, f56, f70	// A7 * B1	nop	__LINE__	}	{ .mfi#if! defined(TRMMKERNEL) && !defined(BETAZERO)	(p5) LDFD	f51 = [C13], SIZE#else	nop	__LINE__#endif	(p3) FMA	f78   = f46, f57, f78	// A7 * B2	nop	__LINE__	}	;;/* 58 */	{ .mfi#if! defined(TRMMKERNEL) && !defined(BETAZERO)	(p5) LDFD	f52 = [C5 ], SIZE#else	nop	__LINE__#endif	(p3) FMA	f86   = f46, f58, f86	// A7 * B3	nop	__LINE__	}	{ .mfi#if! defined(TRMMKERNEL) && !defined(BETAZERO)	(p5) LDFD	f53 = [C13], SIZE#else	nop	__LINE__#endif	(p3) FMA	f94   = f46, f59, f94	// A7 * B4	nop  __LINE__	}	;;/* 59 */	{ .mfi#if! defined(TRMMKERNEL) && !defined(BETAZERO)	(p5) LDFD	f54 = [C5 ], -3 * SIZE#else	nop	__LINE__#endif	(p3) FMA	f102  = f46, f60, f102	// A7 * B5	nop	__LINE__	}	{ .mfi#if! defined(TRMMKERNEL) && !defined(BETAZERO)	(p5) LDFD	f55 = [C13], -3 * SIZE#else	nop	__LINE__#endif	(p3) FMA	f110  = f46, f61, f110	// A7 * B6	nop	__LINE__	}	;;/* 60 */	{ .mfi#if! defined(TRMMKERNEL) && !defined(BETAZERO)	(p5) LDFD	f40 = [C6 ], SIZE#else	nop	__LINE__#endif	(p3) FMA	f118  = f46, f62, f118	// A7 * B7	nop	__LINE__	}	{ .mfi#if! defined(TRMMKERNEL) && !defined(BETAZERO)	(p5) LDFD	f41 = [C14], SIZE#else	nop	__LINE__#endif	(p3) FMA	f126  = f46, f63, f126	// A7 * B8	nop	__LINE__	}	;;/* 61 */	{ .mfi#if! defined(TRMMKERNEL) && !defined(BETAZERO)	(p5) LDFD	f42 = [C6 ], SIZE#else	nop	__LINE__#endif	(p3) FMA	f71   = f47, f56, f71	// A8 * B1	nop	__LINE__	}	{ .mfi#if! defined(TRMMKERNEL) && !defined(BETAZERO)	(p5) LDFD	f43 = [C14], SIZE#else	nop	__LINE__#endif	(p3) FMA	f79   = f47, f57, f79	// A8 * B2	nop	__LINE__	}	;;/* 62 */	{ .mfi#if! defined(TRMMKERNEL) && !defined(BETAZERO)	(p5) LDFD	f44 = [C6 ], SIZE#else	nop	__LINE__#endif	(p3) FMA	f87   = f47, f58, f87	// A8 * B3	nop	__LINE__	}	{ .mfi#if! defined(TRMMKERNEL) && !defined(BETAZERO)	(p5) LDFD	f45 = [C14], SIZE#else	nop	__LINE__#endif	(p3) FMA	f95   = f47, f59, f95	// A8 * B4	nop	__LINE__	}	;;/* 63 */	{ .mfi#if! defined(TRMMKERNEL) && !defined(BETAZERO)	(p5) LDFD	f59 = [C6 ], -3 * SIZE#else	nop	__LINE__#endif	(p3) FMA	f103  = f47, f60, f103	// A8 * B5	nop	__LINE__	}	{ .mfi#if! defined(TRMMKERNEL) && !defined(BETAZERO)	(p5) LDFD	f60 = [C14], -3 * SIZE#else	nop	__LINE__#endif	(p3) FMA	f111  = f47, f61, f111	// A8 * B6	nop	__LINE__	}	;;/* 64 */	{ .mfi#if! defined(TRMMKERNEL) && !defined(BETAZERO)	(p5) LDFD	f61 = [C7 ], SIZE#else	nop	__LINE__#endif	(p3) FMA	f119  = f47, f62, f119	// A8 * B7	adds	L = -1, L	}	{ .mfb#if! defined(TRMMKERNEL) && !defined(BETAZERO)	(p5) LDFD	f62 = [C15], SIZE#else	nop	__LINE__#endif	(p3) FMA	f127  = f47, f63, f127	// A8 * B8	br.cloop.sptk.few .L012	}	;;.L013:#if! defined(TRMMKERNEL) && !defined(BETAZERO)	{ .mfi	(p5) LDFD	f63 = [C7 ], SIZE	FMA	f64  = ALPHA, f64, f6	cmp.ne	p6, p0 = 1, I	}	{ .mfb	(p5) LDFD	f6  = [C15], SIZE	FMA	f68  = ALPHA, f68, f7	nop	__LINE__	}	;;	{ .mfi	(p5) LDFD	f7  = [C7 ], SIZE	FMA	f65  = ALPHA, f65, f10	adds	I = -1, I	}	{ .mfb	(p5) LDFD	f10 = [C15], SIZE	FMA	f69  = ALPHA, f69, f11	nop	__LINE__	}	;;	{ .mfb	(p5) LDFD	f11 = [C7 ], -3 * SIZE	FMA	f66  = ALPHA, f66, f12	nop	__LINE__	}	{ .mfb	(p5) LDFD	f12 = [C15], -3 * SIZE	FMA	f70  = ALPHA, f70, f13	nop	__LINE__	}	;;	{ .mfb	LDFD	f13  = [C8 ], SIZE	FMA	f67  = ALPHA, f67, f14	nop	__LINE__	}	{ .mfb	LDFD	f14  = [C16], SIZE	FMA	f71  = ALPHA, f71, f15	nop	__LINE__	}	;;	{ .mmf	STFD	[C1 ] = f64, SIZE	STFD	[C9 ] = f68, SIZE	FMA	f72  = ALPHA, f72, f16	}	{ .mmf	LDFD	f15 = [C8 ], SIZE	LDFD	f16 = [C16], SIZE	FMA	f76  = ALPHA, f76, f17	}	;;	{ .mmf	STFD	[C1 ] = f65, SIZE	STFD	[C9 ] = f69, SIZE	FMA	f73  = ALPHA, f73, f18	}	{ .mmf	LDFD	f17 = [C8 ], SIZE	LDFD	f18 = [C16], SIZE	FMA	f77  = ALPHA, f77, f19	}	;;	{ .mmf	STFD	[C1 ] = f66, SIZE	STFD	[C9 ] = f70, SIZE	FMA	f74  = ALPHA, f74, f20	}	{ .mmf	LDFD	f19 = [C8 ], -3 * SIZE	LDFD	f20 = [C16], -3 * SIZE	FMA	f78  = ALPHA, f78, f21	}	;;	{ .mfb	STFD	[C1 ] = f67, 5 * SIZE	FMA	f75  = ALPHA, f75, f22	nop	__LINE__	}	{ .mfb	STFD	[C9 ] = f71, 5 * SIZE	FMA	f79  = ALPHA, f79, f23	nop	__LINE__	}	;;	{ .mfb	STFD	[C2 ] = f72, SIZE	FMA	f80  = ALPHA, f80, f24	nop	__LINE__	}	{ .mfb	STFD	[C10] = f76, SIZE	FMA	f84  = ALPHA, f84, f25	nop	__LINE__	}	;;	{ .mfb	STFD	[C2 ] = f73, SIZE	FMA	f81  = ALPHA, f81, f26	nop	__LINE__	}	{ .mfb	STFD	[C10] = f77, SIZE	FMA	f85  = ALPHA, f85, f27	nop	__LINE__	}	;;	{ .mfb	STFD	[C2 ] = f74, SIZE	FMA	f82  = ALPHA, f82, f28	nop	__LINE__	}	{ .mfb	STFD	[C10] = f78, SIZE	FMA	f86  = ALPHA, f86, f29	nop	__LINE__	}	;;	{ .mfb	STFD	[C2 ] = f75, 5 * SIZE	FMA	f83  = ALPHA, f83, f30	nop	__LINE__	}	{ .mfb	STFD	[C10] = f79, 5 * SIZE	FMA	f87  = ALPHA, f87, f31	nop	__LINE__	}	;;	{ .mfb	STFD	[C3 ] = f80, SIZE	FMA	f88  = ALPHA, f88, f32	nop	__LINE__	}	{ .mfb	STFD	[C11] = f84, SIZE	FMA	f92  = ALPHA, f92, f33	nop	__LINE__	}	;;	{ .mfb	STFD	[C3 ] = f81, SIZE	FMA	f89  = ALPHA, f89, f34	nop	__LINE__	}	{ .mfb	STFD	[C11] = f85, SIZE	FMA	f93  = ALPHA, f93, f35	nop	__LINE__	}	;;	{ .mfb	STFD	[C3 ] = f82, SIZE	FMA	f90  = ALPHA, f90, f36	nop	__LINE__	}	{ .mfb	STFD	[C11] = f86, SIZE	FMA	f94  = ALPHA, f94, f37	nop	__LINE__	}	;;	{ .mfb	STFD	[C3 ] = f83, 5 * SIZE	FMA	f91  = ALPHA, f91, f38	nop	__LINE__	}	{ .mfb	STFD	[C11] = f87, 5 * SIZE	FMA	f95  = ALPHA, f95, f39	nop	__LINE__	}	;;	{ .mfb	STFD	[C4 ] = f88, SIZE	FMA	f96  = ALPHA, f96,  f48	nop	__LINE__	}	{ .mfb	STFD	[C12] = f92, SIZE	FMA	f100 = ALPHA, f100, f49	nop	__LINE__	}	;;	{ .mfb	STFD	[C4 ] = f89, SIZE	FMA	f97  = ALPHA, f97,  f50	nop	__LINE__	}	{ .mfb	STFD	[C12] = f93, SIZE	FMA	f101 = ALPHA, f101, f51	nop	__LINE__	}	;;	{ .mfb	STFD	[C4 ] = f90, SIZE	FMA	f98  = ALPHA, f98,  f52	nop	__LINE__	}	{ .mfb	STFD	[C12] = f94, SIZE	FMA	f102 = ALPHA, f102, f53	nop	__LINE__	}	;;	{ .mfb	STFD	[C4 ] = f91, 5 * SIZE	FMA	f99  = ALPHA, f99,  f54	nop	__LINE__	}	{ .mfb	STFD	[C12] = f95, 5 * SIZE	FMA	f103 = ALPHA, f103, f55	nop	__LINE__	}	;;	{ .mfb	STFD	[C5 ] = f96,  SIZE	FMA	f104 = ALPHA, f104, f40	nop	__LINE__	}	{ .mfb	STFD	[C13] = f100, SIZE	FMA	f108 = ALPHA, f108, f41	nop	__LINE__	}	;;	{ .mfb	STFD	[C5 ] = f97,  SIZE	FMA	f105 = ALPHA, f105, f42	nop	__LINE__	}	{ .mfb	STFD	[C13] = f101, SIZE	FMA	f109 = ALPHA, f109, f43	nop	__LINE__	}	;;	{ .mfb	STFD	[C5 ] = f98,  SIZE	FMA	f106 = ALPHA, f106, f44	nop	__LINE__	}	{ .mfb	STFD	[C13] = f102, SIZE	FMA	f110 = ALPHA, f110, f45	nop	__LINE__	}	;;	{ .mfb	STFD	[C5 ] = f99,  5 * SIZE

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -