⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 sgemv_n.s

📁 Optimized GotoBLAS libraries
💻 S
📖 第 1 页 / 共 4 页
字号:
	(p14) FMA	f104 = f15, f78, f104	(p14) FMA	f105 = f15, f79, f105	(p15) FMA	f106 = f15, f87, f106	;;	(p13) STFD	[YST1] = f100, 1 * SIZE	;;	(p13) STFD	[YST1] = f101, 1 * SIZE	;;	(p13) STFD	[YST1] = f102, 1 * SIZE	;;	(p13) STFD	[YST1] = f103, 1 * SIZE	;;	(p14) STFD	[YST1] = f104, 1 * SIZE	;;	(p14) STFD	[YST1] = f105, 1 * SIZE	;;	(p15) STFD	[YST1] = f106, 1 * SIZE	(p6) br.cond.dptk .L11	;;	.align 16.L20:	{ .mmi	mov	YLD1 = YY	mov	YST1 = YY	tbit.z	p6, p0 = N, 2	}	;;	{ .mib	mov	AO1 = A	mov	pr.rot= 0	(p6) br.cond.dpnt .L30	}	;;	{ .mmi	LDFD	f8 = [X], INCX	(p8) LDFD	f106 = [YLD1], 1 * SIZE	add	AO2 = LDA, A	}	;;	{ .mmi	LDFD	f9 = [X], INCX	(p8) LDFD	f80 = [AO1], 1 * SIZE	shladd	AO4 = LDA, 1, AO2	}	;;	{ .mmi	LDFD	f10 = [X], INCX	(p8) LDFD	f81 = [AO2], 1 * SIZE	shladd	AO3 = LDA, 1, A	}	;;	{ .mmi	LDFD	f11 = [X], INCX	(p8) LDFD	f82 = [AO3], 1 * SIZE	}	;;	{ .mfi	(p8) LDFD	f83 = [AO4], 1 * SIZE	FMPY	f8  = ALPHA, f8	adds	PREB   = RPREFETCH * SIZE, YLD1	}	{ .mfi	adds	RPRE1  = RPREFETCH * SIZE, AO1	FMPY	f9  = ALPHA, f9	adds	RPRE2  = (RPREFETCH + 8) * SIZE, AO2	}	;;	FMPY	f10 = ALPHA, f10	shladd	A   = LDA, 2, A	FMPY	f11 = ALPHA, f11	;;	{ .mfi	adds	RPRE3  = RPREFETCH * SIZE, AO3	(p8) FMA	f106 = f8,  f80, f106	mov	ar.ec= 2	}	;;	adds	RPRE4  = (RPREFETCH + 8) * SIZE, AO4	(p8) FMA	f106 = f9,  f81, f106	shr	I = MM, 3	;;	{ .mmf	cmp.eq	p6, p0 = 0, I	cmp.eq	p16, p0 = r0, r0	(p8) FMA	f106 = f10, f82, f106	}	;;	{ .mfi	adds	I = -1, I	(p8) FMA	f106 = f11, f83, f106	tbit.nz	p13, p0 = MM, 2	}	;;	{ .mib	(p8) STFD	[YST1] = f106, 1 * SIZE	mov	ar.lc = I	(p6) br.cond.dpnt .L25	}	;;	.align 16.L22:	{ .mfi	(p17) LDFPD	f63,  f64  = [AO4], 2 * SIZE	(p17) FMA	f101 = f8,  f33, f101	(p16) tbit.nz.unc	p14, p15 = I, 0	}	{ .mfi	(p16) LDFPD	f100, f103 = [YLD1], 2 * SIZE	(p17) FMA	f104 = f8,  f34, f104	}	;;	{ .mfi	(p16) LDFPD	f32,  f33  = [AO1], 2 * SIZE	(p17) FMA	f107 = f8,  f35, f107	(p16) adds	I = -1, I	}	{ .mfi	(p14) PREFETCH	[RPRE1], 16 * SIZE	(p17) FMA	f110 = f8,  f36, f110	}	;;	{ .mfi	(p16) LDFPD	f34,  f35  = [AO1], 2 * SIZE	(p17) FMA	f113 = f8,  f37, f113	}	{ .mfi	(p16) LDFPD	f106, f109 = [YLD1], 2 * SIZE	(p17) FMA	f116 = f8,  f38, f116	}	;;	{ .mfi	(p16) LDFPD	f36,  f37  = [AO1], 2 * SIZE	(p17) FMA	f119 = f8,  f39, f119	}	{ .mfi	(p16) LDFPD	f112, f115 = [YLD1], 2 * SIZE	(p17) FMA	f122 = f8,  f40, f122	}	;;	{ .mfi	(p16) LDFPD	f38,  f39  = [AO1], 2 * SIZE	(p17) FMA	f101 = f9,  f41, f101	}	{ .mfi	(p16) LDFPD	f118, f121 = [YLD1], 2 * SIZE	(p17) FMA	f104 = f9,  f42, f104	}	;;	{ .mmf	(p16) LDFPD	f40,  f41  = [AO2], 2 * SIZE	(p15) PREFETCH	[RPRE2], 16 * SIZE	(p17) FMA	f107 = f9,  f43, f107	}	{ .mfi	(p18) STFD	[YST1] = f16, 1 * SIZE	(p17) FMA	f110 = f9,  f44, f110	}	;;	{ .mfi	(p16) LDFPD	f42,  f43  = [AO2], 2 * SIZE	(p17) FMA	f113 = f9,  f45, f113	}	{ .mfi	(p18) STFD	[YST1] = f17, 1 * SIZE	(p17) FMA	f116 = f9,  f46, f116	}	;;	{ .mfi	(p16) LDFPD	f44,  f45  = [AO2], 2 * SIZE	(p17) FMA	f119 = f9,  f47, f119	}	{ .mfi	(p18) STFD	[YST1] = f18, 1 * SIZE	(p17) FMA	f122 = f9,  f48, f122	}	;;	{ .mfi	(p16) LDFPD	f46,  f47  = [AO2], 2 * SIZE	(p17) FMA	f101 = f10, f49, f101	}	{ .mfi	(p14) lfetch.excl.nt2	[PREB],  16 * SIZE	(p17) FMA	f104 = f10, f50, f104	}	;;	{ .mfi	(p16) LDFPD	f48,  f49  = [AO3], 2 * SIZE	(p17) FMA	f107 = f10, f51, f107	}	{ .mfi	(p14) PREFETCH	[RPRE3], 16 * SIZE	(p17) FMA	f110 = f10, f52, f110	}	;;	{ .mfi	(p16) LDFPD	f50,  f51  = [AO3], 2 * SIZE	(p17) FMA	f113 = f10, f53, f113	}	{ .mfi	(p18) STFD	[YST1] = f19, 1 * SIZE	(p17) FMA	f116 = f10, f54, f116	}	;;	{ .mfi	(p16) LDFPD	f52,  f53  = [AO3], 2 * SIZE	(p17) FMA	f119 = f10, f55, f119	}	{ .mfi	(p18) STFD	[YST1] = f20, 1 * SIZE	(p17) FMA	f122 = f10, f56, f122	}	;;	{ .mfi	(p16) LDFPD	f54,  f55  = [AO3], 2 * SIZE	(p17) FMA	f16 = f11, f57, f101	}	{ .mfi	(p15) PREFETCH	[RPRE4], 16 * SIZE	(p17) FMA	f17 = f11, f58, f104	}	;;	{ .mfi	(p16) LDFPD	f56,  f57  = [AO4], 2 * SIZE	(p17) FMA	f18 = f11, f59, f107	}	{ .mfi	(p18) STFD	[YST1] = f21, 1 * SIZE	(p17) FMA	f19 = f11, f60, f110	}	;;	{ .mfi	(p16) LDFPD	f58,  f59  = [AO4], 2 * SIZE	(p17) FMA	f20 = f11, f61, f113	}	{ .mfi	(p18) STFD	[YST1] = f22, 1 * SIZE	(p17) FMA	f21 = f11, f62, f116	}	;;	{ .mfi	(p16) LDFPD	f60,  f61  = [AO4], 2 * SIZE	(p17) FMA	f22 = f11, f63, f119	}	{ .mfb	(p18) STFD	[YST1] = f23, 1 * SIZE	(p17) FMA	f23 = f11, f64, f122	br.ctop.sptk.few .L22	}	;;	.align 16.L25:	{ .mmi	(p13) LDFPD	f32,  f33  = [AO1], 2 * SIZE	(p13) LDFPD	f100, f101 = [YLD1], 2 * SIZE	tbit.nz	p14, p0 = MM, 1	}	{ .mmi	(p18) STFD	[YST1] = f16, 1 * SIZE	}	;;	{ .mmi	(p13) LDFPD	f48,  f49  = [AO1], 2 * SIZE	(p13) LDFPD	f102, f103 = [YLD1], 2 * SIZE	tbit.nz	p15, p0 = MM, 0	}	{ .mmi	(p18) STFD	[YST1] = f17, 1 * SIZE	}	;;	{ .mmi	(p14) LDFPD	f64,  f65  = [AO1], 2 * SIZE	(p14) LDFPD	f104, f105 = [YLD1], 2 * SIZE	}	{ .mmi	(p18) STFD	[YST1] = f18, 1 * SIZE	}	;;	{ .mmi	(p15) LDFD	f80 = [AO1]	(p15) LDFD	f106 = [YLD1], 1 * SIZE	}	{ .mmi	(p18) STFD	[YST1] = f19, 1 * SIZE	}	;;	{ .mmi	(p13) LDFPD	f34,  f35  = [AO2], 2 * SIZE	(p13) LDFPD	f36,  f37  = [AO3], 2 * SIZE	}	{ .mmi	(p18) STFD	[YST1] = f20, 1 * SIZE	}	;;	{ .mmi	(p13) LDFPD	f50,  f51  = [AO2], 2 * SIZE	(p13) LDFPD	f52,  f53  = [AO3], 2 * SIZE	}	{ .mmi	(p18) STFD	[YST1] = f21, 1 * SIZE	}	;;	{ .mmi	(p14) LDFPD	f66,  f67  = [AO2], 2 * SIZE	(p14) LDFPD	f68,  f69  = [AO3], 2 * SIZE	}	{ .mmi	(p18) STFD	[YST1] = f22, 1 * SIZE	}	;;	{ .mmf	(p15) LDFD	f81 = [AO2]	(p15) LDFD	f82 = [AO3]	(p13) FMA	f100 = f8,  f32, f100	}	{ .mfi	(p18) STFD	[YST1] = f23, 1 * SIZE	(p13) FMA	f101 = f8,  f33, f101	}	;;	;;	{ .mfi	(p13) LDFPD	f38,  f39  = [AO4], 2 * SIZE	(p13) FMA	f102 = f8,  f48, f102	}	{ .mfi	(p13) FMA	f103 = f8,  f49, f103	}	;;	{ .mfi	(p13) LDFPD	f54,  f55  = [AO4], 2 * SIZE	(p14) FMA	f104 = f8,  f64, f104	}	{ .mfi	(p14) FMA	f105 = f8,  f65, f105	}	;;	{ .mfi	(p14) LDFPD	f70,  f71  = [AO4], 2 * SIZE	(p15) FMA	f106 = f8,  f80, f106	}	{ .mfi	(p13) FMA	f100 = f9,  f34, f100	}	;;	{ .mfi	(p15) LDFD	f83 = [AO4]	(p13) FMA	f101 = f9,  f35, f101	}	{ .mfi	(p13) FMA	f102 = f9,  f50, f102	}	;;	(p13) FMA	f103 = f9,  f51, f103	(p14) FMA	f104 = f9,  f66, f104	(p14) FMA	f105 = f9,  f67, f105	(p15) FMA	f106 = f9,  f81, f106	;;	(p13) FMA	f100 = f10, f36, f100	(p13) FMA	f101 = f10, f37, f101	(p13) FMA	f102 = f10, f52, f102	(p13) FMA	f103 = f10, f53, f103	(p14) FMA	f104 = f10, f68, f104	(p14) FMA	f105 = f10, f69, f105	(p15) FMA	f106 = f10, f82, f106	;;	(p13) FMA	f100 = f11, f38, f100	(p13) FMA	f101 = f11, f39, f101	;;	(p13) FMA	f102 = f11, f54, f102	(p13) STFD	[YST1] = f100, 1 * SIZE	(p13) FMA	f103 = f11, f55, f103	;;	(p13) STFD	[YST1] = f101, 1 * SIZE	(p14) FMA	f104 = f11, f70, f104	;;	(p13) STFD	[YST1] = f102, 1 * SIZE	(p14) FMA	f105 = f11, f71, f105	;;	(p13) STFD	[YST1] = f103, 1 * SIZE	(p15) FMA	f106 = f11, f83, f106	;;	(p14) STFD	[YST1] = f104, 1 * SIZE	;;	(p14) STFD	[YST1] = f105, 1 * SIZE	;;	(p15) STFD	[YST1] = f106, 1 * SIZE	;;	.align 16.L30:	{ .mmi	mov	YLD1 = YY	mov	YST1 = YY	tbit.z	p6, p0 = N, 1	}	;;	{ .mib	mov	AO1 = A	mov	pr.rot= 0	(p6) br.cond.dpnt .L40	}	;;	{ .mmi	LDFD	f8 = [X], INCX	(p8) LDFD	f106 = [YLD1], 1 * SIZE	add	AO2 = LDA, A	}	;;	{ .mmi	LDFD	f9 = [X], INCX	(p8) LDFD	f80 = [AO1], 1 * SIZE	shladd	A   = LDA, 1, A	}	;;	adds	PREB   = RPREFETCH * SIZE, YLD1	FMPY	f8  = ALPHA, f8	mov	ar.ec= 2	adds	RPRE1  = RPREFETCH * SIZE, AO1	FMPY	f9  = ALPHA, f9	shr	I = MM, 3	;;	(p8) LDFD	f81 = [AO2], 1 * SIZE	cmp.eq	p6, p0 = 0, I	;;	(p8) FMA	f106 = f8,  f80, f106	adds	RPRE2  = (RPREFETCH + 8) * SIZE, AO2	tbit.nz	p13, p0 = MM, 2	;;	(p8) FMA	f106 = f9,  f81, f106	cmp.eq	p16, p0 = r0, r0	adds	I = -1, I	;;	{ .mib	(p8) STFD	[YST1] = f106, 1 * SIZE	mov	ar.lc = I	(p6) br.cond.dpnt .L35	}	;;	.align 16.L32:	{ .mfi	(p17) LDFPD	f47,  f48  = [AO2], 2 * SIZE	(p17) FMA	f101 = f8,  f33, f101	(p16) tbit.nz.unc	p14, p15 = I, 0	}	{ .mmf	(p16) LDFPD	f100, f103 = [YLD1], 2 * SIZE	(p18) STFD	[YST1] = f16, 1 * SIZE	(p17) FMA	f104 = f8,  f34, f104	}	;;	{ .mfi	(p16) LDFPD	f32,  f33  = [AO1], 2 * SIZE	(p17) FMA	f107 = f8,  f35, f107	adds	I = -1, I	}	{ .mmf	(p14) PREFETCH	[RPRE1], 16 * SIZE	(p18) STFD	[YST1] = f17, 1 * SIZE	(p17) FMA	f110 = f8,  f36, f110	}	;;	{ .mfi	(p16) LDFPD	f34,  f35  = [AO1], 2 * SIZE	(p17) FMA	f113 = f8,  f37, f113	}	{ .mmf	(p16) LDFPD	f106, f109 = [YLD1], 2 * SIZE	(p18) STFD	[YST1] = f18, 1 * SIZE	(p17) FMA	f116 = f8,  f38, f116	}	;;	{ .mfi	(p16) LDFPD	f36,  f37  = [AO1], 2 * SIZE	(p17) FMA	f119 = f8,  f39, f119	}	{ .mmf	(p16) LDFPD	f112, f115 = [YLD1], 2 * SIZE	(p18) STFD	[YST1] = f19, 1 * SIZE	(p17) FMA	f122 = f8,  f40, f122	}	;;	{ .mfi	(p16) LDFPD	f38,  f39  = [AO1], 2 * SIZE	(p17) FMA	f16 = f9,  f41, f101	}	{ .mmf	(p16) LDFPD	f118, f121 = [YLD1], 2 * SIZE	(p18) STFD	[YST1] = f20, 1 * SIZE	(p17) FMA	f17 = f9,  f42, f104	}	;;	{ .mfi	(p16) LDFPD	f40,  f41  = [AO2], 2 * SIZE	(p17) FMA	f18 = f9,  f43, f107	}	{ .mmf	(p15) PREFETCH	[RPRE2], 16 * SIZE	(p18) STFD	[YST1] = f21, 1 * SIZE	(p17) FMA	f19 = f9,  f44, f110	}	;;	{ .mfi	(p16) LDFPD	f42,  f43  = [AO2], 2 * SIZE	(p17) FMA	f20 = f9,  f45, f113	}	{ .mmf	(p14) PREFETCH	[PREB],  16 * SIZE	(p18) STFD	[YST1] = f22, 1 * SIZE	(p17) FMA	f21 = f9,  f46, f116	}	;;	{ .mfi	(p16) LDFPD	f44,  f45  = [AO2], 2 * SIZE	(p17) FMA	f22 = f9,  f47, f119	}	{ .mfb	(p18) STFD	[YST1] = f23, 1 * SIZE	(p17) FMA	f23 = f9,  f48, f122	br.ctop.sptk.few .L32	}	;;	.align 16.L35:	{ .mmi	(p13) LDFPD	f32,  f33  = [AO1], 2 * SIZE	(p13) LDFPD	f100, f101 = [YLD1], 2 * SIZE	tbit.nz	p14, p0 = MM, 1	}	{ .mmi	(p18) STFD	[YST1] = f16, 1 * SIZE	}	;;	{ .mmi	(p13) LDFPD	f48,  f49  = [AO1], 2 * SIZE	(p13) LDFPD	f102, f103 = [YLD1], 2 * SIZE	tbit.nz	p15, p0 = MM, 0	}	{ .mmi	(p18) STFD	[YST1] = f17, 1 * SIZE	}	;;	{ .mmi	(p14) LDFPD	f64,  f65  = [AO1], 2 * SIZE	(p14) LDFPD	f104, f105 = [YLD1], 2 * SIZE	}	{ .mmi	(p18) STFD	[YST1] = f18, 1 * SIZE	}	;;	{ .mmi	(p15) LDFD	f80 = [AO1]	(p15) LDFD	f106 = [YLD1], 1 * SIZE	}	{ .mmi	(p18) STFD	[YST1] = f19, 1 * SIZE	}	;;	{ .mmi	(p13) LDFPD	f34,  f35  = [AO2], 2 * SIZE	(p18) STFD	[YST1] = f20, 1 * SIZE	}	;;	{ .mmi	(p13) LDFPD	f50,  f51  = [AO2], 2 * SIZE	(p18) STFD	[YST1] = f21, 1 * SIZE	}	;;	{ .mmi	(p14) LDFPD	f66,  f67  = [AO2], 2 * SIZE	(p18) STFD	[YST1] = f22, 1 * SIZE	}	;;	{ .mmi	(p15) LDFD	f81 = [AO2]	(p18) STFD	[YST1] = f23, 1 * SIZE	}	;;	(p13) FMA	f100 = f8,  f32, f100	(p13) FMA	f101 = f8,  f33, f101	(p13) FMA	f102 = f8,  f48, f102	(p13) FMA	f103 = f8,  f49, f103	(p14) FMA	f104 = f8,  f64, f104	(p14) FMA	f105 = f8,  f65, f105	(p15) FMA	f106 = f8,  f80, f106	;;	(p13) FMA	f100 = f9,  f34, f100	(p13) FMA	f101 = f9,  f35, f101	(p13) FMA	f102 = f9,  f50, f102	(p13) FMA	f103 = f9,  f51, f103	(p14) FMA	f104 = f9,  f66, f104	(p14) FMA	f105 = f9,  f67, f105	(p15) FMA	f106 = f9,  f81, f106	;;	(p13) STFD	[YST1] = f100, 1 * SIZE	;;	(p13) STFD	[YST1] = f101, 1 * SIZE	;;	(p13) STFD	[YST1] = f102, 1 * SIZE	;;	(p13) STFD	[YST1] = f103, 1 * SIZE	;;	(p14) STFD	[YST1] = f104, 1 * SIZE	;;	(p14) STFD	[YST1] = f105, 1 * SIZE	;;	(p15) STFD	[YST1] = f106, 1 * SIZE	;;	.align 16.L40:	{ .mmi	mov	YLD1 = YY	mov	YST1 = YY	tbit.z	p6, p0 = N, 0	}	;;	{ .mib	mov	AO1 = A	mov	pr.rot= 0	(p6) br.cond.dpnt .L990	}	;;	{ .mmi	LDFD	f8 = [X], INCX	(p8) LDFD	f106 = [YLD1], 1 * SIZE	adds	RPRE1  = RPREFETCH * SIZE, AO1	}	;;	{ .mii	(p8) LDFD	f80 = [AO1], 1 * SIZE	adds	PREB   = RPREFETCH * SIZE, YLD1	}	;;	FMPY	f8  = ALPHA, f8	shr	I = MM, 3	;;	(p8) FMA	f106 = f8,  f80, f106	mov	ar.ec= 3	;;	{ .mmi	cmp.eq	p6, p0 = 0, I	cmp.eq	p16, p0 = r0, r0	tbit.nz	p14, p15 = r0, 0	}	;;	{ .mmi	adds	YST2 = 4 * SIZE, YST1	adds	I = -1, I	tbit.nz	p13, p0 = MM, 2	}	;;	{ .mmi	(p8) STFD	[YST1] = f106, 1 * SIZE	(p8) adds	YST2 = 1 * SIZE, YST2	}	{ .mib	mov	ar.lc = I	(p6) br.cond.dpnt .L145	}	;;	.align 16.L42:	{ .mmf	(p19) STFD	[YST1] = f16, 1 * SIZE	(p19) STFD	[YST2] = f20, 1 * SIZE	(p18) FMA	f16 = f8,  f34, f102	}	{ .mmf	(p16) LDFPD	f32,  f35  = [AO1], 2 * SIZE	(p16) LDFPD	f100, f103 = [YLD1], 2 * SIZE	(p18) FMA	f20 = f8,  f46, f114	}	;;	{ .mmf	(p19) STFD	[YST1] = f17, 1 * SIZE	(p19) STFD	[YST2] = f21, 1 * SIZE	(p18) FMA	f17 = f8,  f37, f105	}	{ .mmf	(p16) LDFPD	f38,  f41  = [AO1], 2 * SIZE	(p16) LDFPD	f106, f109 = [YLD1], 2 * SIZE	(p18) FMA	f21 = f8,  f49, f117	}	;;	{ .mmf	(p19) STFD	[YST1] = f18, 1 * SIZE	(p19) STFD	[YST2] = f22, 1 * SIZE	(p18) FMA	f18 = f8,  f40, f108	}	{ .mmf	(p16) LDFPD	f44,  f47  = [AO1], 2 * SIZE	(p16) LDFPD	f112, f115 = [YLD1], 2 * SIZE	(p18) FMA	f22 = f8,  f52, f120	}	;;	{ .mmf	(p19) STFD	[YST1] = f19, 5 * SIZE	(p19) STFD	[YST2] = f23, 5 * SIZE	(p18) FMA	f19 = f8,  f43, f111	}	{ .mmf	(p16) LDFPD	f50,  f53  = [AO1], 2 * SIZE	(p16) LDFPD	f118, f121 = [YLD1], 2 * SIZE	(p18) FMA	f23 = f8,  f55, f123	}	;;	{ .mmi	(p14) PREFETCH	[RPRE1], 16 * SIZE	(p14) PREFETCH	[PREB],  16 * SIZE	(p16) tbit.nz.unc	p14, p15 = I, 0	}	{ .mib	nop	__LINE__	(p16) adds	I = -1, I	br.ctop.sptk.few .L42	}	;;	.align 16.L45:	{ .mmi	(p19) STFD	[YST1] = f16, 1 * SIZE	(p19) STFD	[YST2] = f20, 1 * SIZE	tbit.nz	p14, p0 = MM, 1	}	{ .mmi	(p13) LDFPD	f32,  f33  = [AO1], 2 * SIZE	(p13) LDFPD	f100, f101 = [YLD1], 2 * SIZE	}	;;	{ .mmi	(p19) STFD	[YST1] = f17, 1 * SIZE	(p19) STFD	[YST2] = f21, 1 * SIZE	tbit.nz	p15, p0 = MM, 0	}	{ .mmi	(p13) LDFPD	f48,  f49  = [AO1], 2 * SIZE	(p13) LDFPD	f102, f103 = [YLD1], 2 * SIZE	}	;;	{ .mmi	(p19) STFD	[YST1] = f18, 1 * SIZE	(p19) STFD	[YST2] = f22, 1 * SIZE	}	{ .mmi	(p14) LDFPD	f64,  f65  = [AO1], 2 * SIZE	(p14) LDFPD	f104, f105 = [YLD1], 2 * SIZE	}	;;	{ .mmi	(p19) STFD	[YST1] = f19, 5 * SIZE	(p19) STFD	[YST2] = f23, 5 * SIZE	}	{ .mmi	(p15) LDFD	f80 = [AO1]	(p15) LDFD	f106 = [YLD1], 1 * SIZE	}	;;	(p13) FMA	f100 = f8,  f32, f100	(p13) FMA	f101 = f8,  f33, f101	(p13) FMA	f102 = f8,  f48, f102	(p13) FMA	f103 = f8,  f49, f103	;;	(p13) STFD	[YST1] = f100, 1 * SIZE	(p14) FMA	f104 = f8,  f64, f104	;;	(p13) STFD	[YST1] = f101, 1 * SIZE	(p14) FMA	f105 = f8,  f65, f105	;;	(p13) STFD	[YST1] = f102, 1 * SIZE	(p15) FMA	f106 = f8,  f80, f106	;;	(p13) STFD	[YST1] = f103, 1 * SIZE	;;	(p14) STFD	[YST1] = f104, 1 * SIZE	;;	(p14) STFD	[YST1] = f105, 1 * SIZE	;;	(p15) STFD	[YST1] = f106, 1 * SIZE	br    .L990	;;	.align 16.L100:	shr	J   = N, 3	;;	cmp.eq	p6, p0 = r0, J	(p6) br.cond.dpnt .L120	;;	.align 16.L111:	mov	YLD1 = YY	mov	YST1 = YY	;;	LDFD	f8 = [X], INCX	;;	LDFD	f9 = [X], INCX	;;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -