⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 sgemv_n.s

📁 Optimized GotoBLAS libraries
💻 S
📖 第 1 页 / 共 4 页
字号:
	(p14) lfetch.excl.nt2	[PREB],  16 * SIZE	(p17) FMA	f104 = f10, f50, f104	}	;;	{ .mfi	(p16) LDFPD	f48,  f49  = [AO3], 2 * SIZE	(p17) FMA	f107 = f10, f51, f107	}	{ .mfi	(p14) PREFETCH	[RPRE3], 16 * SIZE	(p17) FMA	f110 = f10, f52, f110	}	;;	{ .mfi	(p16) LDFPD	f50,  f51  = [AO3], 2 * SIZE	(p17) FMA	f113 = f10, f53, f113	}	{ .mfi	(p18) STFD	[YST1] = f19, 1 * SIZE	(p17) FMA	f116 = f10, f54, f116	}	;;	{ .mfi	(p16) LDFPD	f52,  f53  = [AO3], 2 * SIZE	(p17) FMA	f119 = f10, f55, f119	}	{ .mfi	(p18) STFD	[YST1] = f20, 1 * SIZE	(p17) FMA	f122 = f10, f56, f122	}	;;	{ .mfi	(p16) LDFPD	f54,  f55  = [AO3], 2 * SIZE	(p17) FMA	f16 = f11, f57, f101	}	{ .mmf	(p15) PREFETCH	[RPRE4], 16 * SIZE	(p16) LDFD	f56  = [AO4], 1 * SIZE	(p17) FMA	f17 = f11, f58, f104	}	;;	{ .mfi	(p16) LDFPD	f57,  f58  = [AO4], 2 * SIZE	(p17) FMA	f18 = f11, f59, f107	}	{ .mfi	(p18) STFD	[YST1] = f21, 1 * SIZE	(p17) FMA	f19 = f11, f60, f110	}	;;	{ .mfi	(p16) LDFPD	f59,  f60  = [AO4], 2 * SIZE	(p17) FMA	f20 = f11, f61, f113	}	{ .mfi	(p18) STFD	[YST1] = f22, 1 * SIZE	(p17) FMA	f21 = f11, f62, f116	}	;;	{ .mfi	(p16) LDFPD	f61,  f62  = [AO4], 2 * SIZE	(p17) FMA	f22 = f11, f63, f119	}	{ .mfb	(p18) STFD	[YST1] = f23, 1 * SIZE	(p17) FMA	f23 = f11, f64, f122	br.ctop.sptk.few .L122	}	;;	.align 16.L125:	{ .mmi	(p13) LDFPD	f32,  f33  = [AO1], 2 * SIZE	(p13) LDFPD	f100, f101 = [YLD1], 2 * SIZE	tbit.nz	p14, p0 = MM, 1	}	{ .mmi	(p18) STFD	[YST1] = f16, 1 * SIZE	}	;;	{ .mmi	(p13) LDFPD	f48,  f49  = [AO1], 2 * SIZE	(p13) LDFPD	f102, f103 = [YLD1], 2 * SIZE	tbit.nz	p15, p0 = MM, 0	}	{ .mmi	(p18) STFD	[YST1] = f17, 1 * SIZE	}	;;	{ .mmi	(p14) LDFPD	f64,  f65  = [AO1], 2 * SIZE	(p14) LDFPD	f104, f105 = [YLD1], 2 * SIZE	}	{ .mmi	(p18) STFD	[YST1] = f18, 1 * SIZE	}	;;	{ .mmi	(p18) STFD	[YST1] = f19, 1 * SIZE	(p15) LDFD	f80 = [AO1]	}	{ .mmi	(p15) LDFD	f106 = [YLD1], 1 * SIZE	(p13) LDFD	f34  = [AO2], 1 * SIZE	}	;;	{ .mmi	(p13) LDFPD	f35,  f50  = [AO2], 2 * SIZE	(p13) LDFPD	f36,  f37  = [AO3], 2 * SIZE	}	{ .mmi	(p18) STFD	[YST1] = f20, 1 * SIZE	}	;;	{ .mmi	(p13) LDFD	f51  = [AO2], 1 * SIZE	(p13) LDFPD	f52,  f53  = [AO3], 2 * SIZE	}	{ .mmi	(p18) STFD	[YST1] = f21, 1 * SIZE	}	;;	{ .mmi	(p14) LDFD	f66  = [AO2], 1 * SIZE	(p14) LDFPD	f68,  f69  = [AO3], 2 * SIZE	}	{ .mmi	(p18) STFD	[YST1] = f22, 1 * SIZE	}	;;	{ .mmf	(p18) STFD	[YST1] = f23, 1 * SIZE	(p14) LDFD	f67  = [AO2], 1 * SIZE	(p13) FMA	f100 = f8,  f32, f100	}	{ .mmf	(p15) LDFD	f82 = [AO3]	(p13) LDFD	f38  = [AO4], 1 * SIZE	(p13) FMA	f101 = f8,  f33, f101	}	;;	;;	{ .mmf	(p13) LDFPD	f39,  f54  = [AO4], 2 * SIZE	(p15) LDFD	f81 = [AO2]	(p13) FMA	f102 = f8,  f48, f102	}	{ .mfi	(p13) FMA	f103 = f8,  f49, f103	}	;;	{ .mfi	(p13) LDFD	f55  = [AO4], 1 * SIZE	(p14) FMA	f104 = f8,  f64, f104	}	{ .mfi	(p14) FMA	f105 = f8,  f65, f105	}	;;	{ .mfi	(p14) LDFD	f70  = [AO4], 1 * SIZE	(p15) FMA	f106 = f8,  f80, f106	}	{ .mfi	(p13) FMA	f100 = f9,  f34, f100	}	;;	{ .mfi	(p14) LDFD	f71  = [AO4], 1 * SIZE	(p13) FMA	f101 = f9,  f35, f101	}	{ .mfi	(p13) FMA	f102 = f9,  f50, f102	}	;;	(p15) LDFD	f83 = [AO4]	(p13) FMA	f103 = f9,  f51, f103	(p14) FMA	f104 = f9,  f66, f104	(p14) FMA	f105 = f9,  f67, f105	(p15) FMA	f106 = f9,  f81, f106	;;	(p13) FMA	f100 = f10, f36, f100	(p13) FMA	f101 = f10, f37, f101	(p13) FMA	f102 = f10, f52, f102	(p13) FMA	f103 = f10, f53, f103	(p14) FMA	f104 = f10, f68, f104	(p14) FMA	f105 = f10, f69, f105	(p15) FMA	f106 = f10, f82, f106	;;	(p13) FMA	f100 = f11, f38, f100	(p13) FMA	f101 = f11, f39, f101	;;	(p13) FMA	f102 = f11, f54, f102	(p13) STFD	[YST1] = f100, 1 * SIZE	(p13) FMA	f103 = f11, f55, f103	;;	(p13) STFD	[YST1] = f101, 1 * SIZE	(p14) FMA	f104 = f11, f70, f104	;;	(p13) STFD	[YST1] = f102, 1 * SIZE	(p14) FMA	f105 = f11, f71, f105	;;	(p13) STFD	[YST1] = f103, 1 * SIZE	(p15) FMA	f106 = f11, f83, f106	;;	(p14) STFD	[YST1] = f104, 1 * SIZE	;;	(p14) STFD	[YST1] = f105, 1 * SIZE	;;	(p15) STFD	[YST1] = f106, 1 * SIZE	;;	.align 16.L130:	{ .mmi	mov	YLD1 = YY	mov	YST1 = YY	tbit.z	p6, p0 = N, 1	}	;;	{ .mib	mov	AO1 = A	mov	pr.rot= 0	(p6) br.cond.dpnt .L140	}	;;	{ .mmi	LDFD	f8 = [X], INCX	(p8) LDFD	f106 = [YLD1], 1 * SIZE	add	AO2 = LDA, A	}	;;	{ .mmi	LDFD	f9 = [X], INCX	(p8) LDFD	f80 = [AO1], 1 * SIZE	shladd	A   = LDA, 1, A	}	;;	adds	PREB   = RPREFETCH * SIZE, YLD1	FMPY	f8  = ALPHA, f8	mov	ar.ec= 2	adds	RPRE1  = RPREFETCH * SIZE, AO1	FMPY	f9  = ALPHA, f9	shr	I = MM, 3	;;	(p8) LDFD	f81 = [AO2], 1 * SIZE	cmp.eq	p6, p0 = 0, I	;;	(p8) FMA	f106 = f8,  f80, f106	adds	RPRE2  = (RPREFETCH + 8) * SIZE, AO2	tbit.nz	p13, p0 = MM, 2	;;	(p8) FMA	f106 = f9,  f81, f106	cmp.eq	p16, p0 = r0, r0	adds	I = -1, I	;;	{ .mib	(p8) STFD	[YST1] = f106, 1 * SIZE	mov	ar.lc = I	(p6) br.cond.dpnt .L135	}	;;	.align 16.L132:	{ .mfi	(p17) LDFD	f48  = [AO2], 1 * SIZE	(p17) FMA	f101 = f8,  f33, f101	(p16) tbit.nz.unc	p14, p15 = I, 0	}	{ .mmf	(p16) LDFPD	f100, f103 = [YLD1], 2 * SIZE	(p18) STFD	[YST1] = f16, 1 * SIZE	(p17) FMA	f104 = f8,  f34, f104	}	;;	{ .mfi	(p16) LDFPD	f32,  f33  = [AO1], 2 * SIZE	(p17) FMA	f107 = f8,  f35, f107	adds	I = -1, I	}	{ .mmf	(p14) PREFETCH	[RPRE1], 16 * SIZE	(p18) STFD	[YST1] = f17, 1 * SIZE	(p17) FMA	f110 = f8,  f36, f110	}	;;	{ .mfi	(p16) LDFPD	f34,  f35  = [AO1], 2 * SIZE	(p17) FMA	f113 = f8,  f37, f113	}	{ .mmf	(p16) LDFPD	f106, f109 = [YLD1], 2 * SIZE	(p18) STFD	[YST1] = f18, 1 * SIZE	(p17) FMA	f116 = f8,  f38, f116	}	;;	{ .mfi	(p16) LDFPD	f36,  f37  = [AO1], 2 * SIZE	(p17) FMA	f119 = f8,  f39, f119	}	{ .mmf	(p16) LDFPD	f112, f115 = [YLD1], 2 * SIZE	(p18) STFD	[YST1] = f19, 1 * SIZE	(p17) FMA	f122 = f8,  f40, f122	}	;;	{ .mmf	(p16) LDFPD	f38,  f39  = [AO1], 2 * SIZE	(p16) LDFPD	f118, f121 = [YLD1], 2 * SIZE	(p17) FMA	f16 = f9,  f41, f101	}	{ .mmf	(p18) STFD	[YST1] = f20, 1 * SIZE	(p16) LDFD	f40  = [AO2], 1 * SIZE	(p17) FMA	f17 = f9,  f42, f104	}	;;	{ .mfi	(p16) LDFPD	f41,  f42  = [AO2], 2 * SIZE	(p17) FMA	f18 = f9,  f43, f107	}	{ .mmf	(p15) PREFETCH	[RPRE2], 16 * SIZE	(p18) STFD	[YST1] = f21, 1 * SIZE	(p17) FMA	f19 = f9,  f44, f110	}	;;	{ .mfi	(p16) LDFPD	f43,  f44  = [AO2], 2 * SIZE	(p17) FMA	f20 = f9,  f45, f113	}	{ .mmf	(p14) PREFETCH	[PREB],  16 * SIZE	(p18) STFD	[YST1] = f22, 1 * SIZE	(p17) FMA	f21 = f9,  f46, f116	}	;;	{ .mfi	(p16) LDFPD	f45,  f46  = [AO2], 2 * SIZE	(p17) FMA	f22 = f9,  f47, f119	}	{ .mfb	(p18) STFD	[YST1] = f23, 1 * SIZE	(p17) FMA	f23 = f9,  f48, f122	br.ctop.sptk.few .L132	}	;;	.align 16.L135:	{ .mmi	(p13) LDFPD	f32,  f33  = [AO1], 2 * SIZE	(p13) LDFPD	f100, f101 = [YLD1], 2 * SIZE	tbit.nz	p14, p0 = MM, 1	}	{ .mmi	(p18) STFD	[YST1] = f16, 1 * SIZE	}	;;	{ .mmi	(p13) LDFPD	f48,  f49  = [AO1], 2 * SIZE	(p13) LDFPD	f102, f103 = [YLD1], 2 * SIZE	tbit.nz	p15, p0 = MM, 0	}	{ .mmi	(p18) STFD	[YST1] = f17, 1 * SIZE	}	;;	{ .mmi	(p14) LDFPD	f64,  f65  = [AO1], 2 * SIZE	(p14) LDFPD	f104, f105 = [YLD1], 2 * SIZE	}	{ .mmi	(p18) STFD	[YST1] = f18, 1 * SIZE	}	;;	{ .mmi	(p15) LDFD	f80 = [AO1]	(p15) LDFD	f106 = [YLD1], 1 * SIZE	}	{ .mmi	(p18) STFD	[YST1] = f19, 1 * SIZE	}	;;	{ .mmi	(p13) LDFD	f34 = [AO2], 1 * SIZE	(p18) STFD	[YST1] = f20, 1 * SIZE	}	;;	{ .mmi	(p13) LDFD	f35  = [AO2], 1 * SIZE	(p18) STFD	[YST1] = f21, 1 * SIZE	}	;;	{ .mmi	(p13) LDFD	f50  = [AO2], 1 * SIZE	(p18) STFD	[YST1] = f22, 1 * SIZE	}	;;	{ .mmi	(p13) LDFD	f51  = [AO2], 1 * SIZE	(p18) STFD	[YST1] = f23, 1 * SIZE	}	;;	(p14) LDFD	f66  = [AO2], 1 * SIZE	(p13) FMA	f100 = f8,  f32, f100	;;	(p14) LDFD	f67  = [AO2], 1 * SIZE	(p13) FMA	f101 = f8,  f33, f101	;;	(p15) LDFD	f81 = [AO2]	(p13) FMA	f102 = f8,  f48, f102	(p13) FMA	f103 = f8,  f49, f103	(p14) FMA	f104 = f8,  f64, f104	(p14) FMA	f105 = f8,  f65, f105	(p15) FMA	f106 = f8,  f80, f106	;;	(p13) FMA	f100 = f9,  f34, f100	(p13) FMA	f101 = f9,  f35, f101	(p13) FMA	f102 = f9,  f50, f102	(p13) FMA	f103 = f9,  f51, f103	(p14) FMA	f104 = f9,  f66, f104	(p14) FMA	f105 = f9,  f67, f105	(p15) FMA	f106 = f9,  f81, f106	;;	(p13) STFD	[YST1] = f100, 1 * SIZE	;;	(p13) STFD	[YST1] = f101, 1 * SIZE	;;	(p13) STFD	[YST1] = f102, 1 * SIZE	;;	(p13) STFD	[YST1] = f103, 1 * SIZE	;;	(p14) STFD	[YST1] = f104, 1 * SIZE	;;	(p14) STFD	[YST1] = f105, 1 * SIZE	;;	(p15) STFD	[YST1] = f106, 1 * SIZE	;;	.align 16.L140:	{ .mmi	mov	YLD1 = YY	mov	YST1 = YY	tbit.z	p6, p0 = N, 0	}	;;	{ .mib	mov	AO1 = A	mov	pr.rot= 0	(p6) br.cond.dpnt .L990	}	;;	{ .mmi	LDFD	f8 = [X], INCX	(p8) LDFD	f106 = [YLD1], 1 * SIZE	adds	RPRE1  = RPREFETCH * SIZE, AO1	}	;;	{ .mmi	(p8) LDFD	f80 = [AO1], 1 * SIZE	adds	PREB   = RPREFETCH * SIZE, YLD1	}	;;	FMPY	f8  = ALPHA, f8	shr	I = MM, 3	;;	(p8) FMA	f106 = f8,  f80, f106	mov	ar.ec= 3	;;	{ .mmi	cmp.eq	p6, p0 = 0, I	cmp.eq	p16, p0 = r0, r0	tbit.nz	p14, p15 = r0, 0	}	;;	{ .mmi	adds	YST2 = 4 * SIZE, YST1	adds	I = -1, I	tbit.nz	p13, p0 = MM, 2	}	;;	{ .mmi	(p8) STFD	[YST1] = f106, 1 * SIZE	(p8) adds	YST2 = 1 * SIZE, YST2	}	{ .mib	mov	ar.lc = I	(p6) br.cond.dpnt .L145	}	;;	.align 16.L142:	{ .mmf	(p19) STFD	[YST1] = f16, 1 * SIZE	(p19) STFD	[YST2] = f20, 1 * SIZE	(p18) FMA	f16 = f8,  f34, f102	}	{ .mmf	(p16) LDFPD	f32,  f35  = [AO1], 2 * SIZE	(p16) LDFPD	f100, f103 = [YLD1], 2 * SIZE	(p18) FMA	f20 = f8,  f46, f114	}	;;	{ .mmf	(p19) STFD	[YST1] = f17, 1 * SIZE	(p19) STFD	[YST2] = f21, 1 * SIZE	(p18) FMA	f17 = f8,  f37, f105	}	{ .mmf	(p16) LDFPD	f38,  f41  = [AO1], 2 * SIZE	(p16) LDFPD	f106, f109 = [YLD1], 2 * SIZE	(p18) FMA	f21 = f8,  f49, f117	}	;;	{ .mmf	(p19) STFD	[YST1] = f18, 1 * SIZE	(p19) STFD	[YST2] = f22, 1 * SIZE	(p18) FMA	f18 = f8,  f40, f108	}	{ .mmf	(p16) LDFPD	f44,  f47  = [AO1], 2 * SIZE	(p16) LDFPD	f112, f115 = [YLD1], 2 * SIZE	(p18) FMA	f22 = f8,  f52, f120	}	;;	{ .mmf	(p19) STFD	[YST1] = f19, 5 * SIZE	(p19) STFD	[YST2] = f23, 5 * SIZE	(p18) FMA	f19 = f8,  f43, f111	}	{ .mmf	(p16) LDFPD	f50,  f53  = [AO1], 2 * SIZE	(p16) LDFPD	f118, f121 = [YLD1], 2 * SIZE	(p18) FMA	f23 = f8,  f55, f123	}	;;	{ .mmi	(p14) PREFETCH	[RPRE1], 16 * SIZE	(p14) PREFETCH	[PREB],  16 * SIZE	(p16) tbit.nz.unc	p14, p15 = I, 0	}	{ .mib	nop	__LINE__	(p16) adds	I = -1, I	br.ctop.sptk.few .L142	}	;;	.align 16.L145:	{ .mmi	(p19) STFD	[YST1] = f16, 1 * SIZE	(p19) STFD	[YST2] = f20, 1 * SIZE	tbit.nz	p14, p0 = MM, 1	}	{ .mmi	(p13) LDFPD	f32,  f33  = [AO1], 2 * SIZE	(p13) LDFPD	f100, f101 = [YLD1], 2 * SIZE	}	;;	{ .mmi	(p19) STFD	[YST1] = f17, 1 * SIZE	(p19) STFD	[YST2] = f21, 1 * SIZE	tbit.nz	p15, p0 = MM, 0	}	{ .mmi	(p13) LDFPD	f48,  f49  = [AO1], 2 * SIZE	(p13) LDFPD	f102, f103 = [YLD1], 2 * SIZE	}	;;	{ .mmi	(p19) STFD	[YST1] = f18, 1 * SIZE	(p19) STFD	[YST2] = f22, 1 * SIZE	}	{ .mmi	(p14) LDFPD	f64,  f65  = [AO1], 2 * SIZE	(p14) LDFPD	f104, f105 = [YLD1], 2 * SIZE	}	;;	{ .mmi	(p19) STFD	[YST1] = f19, 5 * SIZE	(p19) STFD	[YST2] = f23, 5 * SIZE	}	{ .mmi	(p15) LDFD	f80 = [AO1]	(p15) LDFD	f106 = [YLD1], 1 * SIZE	}	;;	(p13) FMA	f100 = f8,  f32, f100	(p13) FMA	f101 = f8,  f33, f101	(p13) FMA	f102 = f8,  f48, f102	(p13) FMA	f103 = f8,  f49, f103	(p14) FMA	f104 = f8,  f64, f104	(p14) FMA	f105 = f8,  f65, f105	(p15) FMA	f106 = f8,  f80, f106	;;	(p13) STFD	[YST1] = f100, 1 * SIZE	;;	(p13) STFD	[YST1] = f101, 1 * SIZE	;;	(p13) STFD	[YST1] = f102, 1 * SIZE	;;	(p13) STFD	[YST1] = f103, 1 * SIZE	;;	(p14) STFD	[YST1] = f104, 1 * SIZE	;;	(p14) STFD	[YST1] = f105, 1 * SIZE	;;	(p15) STFD	[YST1] = f106, 1 * SIZE	;;	.align 16.L990:	{ .mmi	mov	YLD1 = YY	mov	YST1 = Y	mov	pr.rot= 0	}	{ .mib	mov	YST2 = Y	shr	J = M, 3	(p10) br.cond.dptk .L999	}	;;	{ .mmi	cmp.eq	p6, p0 = r0, J	adds	J = -1, J	mov	ar.ec = 4	}	{ .mmi	cmp.eq	p16, p0 = r0, r0	nop	__LINE__	tbit.nz	p13, p0 = M, 2	}	;;	{ .mib	nop	__LINE__	mov	ar.lc = J	(p6) br.cond.dpnt .L995	}	;;.L992:	{ .mfi	(p19)	STFD	[YST2] = f35	(p18)	FADD	f34 = f34, f66	(p19)	add YST2 = YST2, INCY	}	{ .mmi	(p16)	LDFD	f64 = [YLD1], 1 * SIZE	(p16)	LDFD	f32 = [YST1], INCY	}	;;	{ .mfi	(p19)	STFD	[YST2] = f39	(p18)	FADD	f38 = f38, f70	(p19)	add YST2 = YST2, INCY	}	{ .mmi	(p16)	LDFD	f36 = [YST1], INCY	(p16)	LDFD	f68 = [YLD1], 1 * SIZE	}	;;	{ .mfi	(p19)	STFD	[YST2] = f43	(p18)	FADD	f42 = f42, f74	(p19)	add YST2 = YST2, INCY	}	{ .mmi	(p16)	LDFD	f72 = [YLD1], 1 * SIZE	(p16)	LDFD	f40 = [YST1], INCY	}	;;	{ .mfi	(p19)	STFD	[YST2] = f47	(p18)	FADD	f46 = f46, f78	(p19)	add YST2 = YST2, INCY	}	{ .mmi	(p16)	LDFD	f76 = [YLD1], 1 * SIZE	(p16)	LDFD	f44 = [YST1], INCY	}	;;	{ .mfi	(p19)	STFD	[YST2] = f51	(p18)	FADD	f50 = f50, f82	(p19)	add YST2 = YST2, INCY	}	{ .mmi	(p16)	LDFD	f80 = [YLD1], 1 * SIZE	(p16)	LDFD	f48 = [YST1], INCY	}	;;	{ .mfi	(p19)	STFD	[YST2] = f55	(p18)	FADD	f54 = f54, f86	(p19)	add YST2 = YST2, INCY	}	{ .mmi	(p16)	LDFD	f84 = [YLD1], 1 * SIZE	(p16)	LDFD	f52 = [YST1], INCY	}	;;	{ .mfi	(p19)	STFD	[YST2] = f59	(p18)	FADD	f58 = f58, f90	(p19)	add YST2 = YST2, INCY	}	{ .mmi	(p16)	LDFD	f88 = [YLD1], 1 * SIZE	(p16)	LDFD	f56 = [YST1], INCY	}	;;	{ .mfi	(p19)	STFD	[YST2] = f63	(p18)	FADD	f62 = f62, f94	(p19)	add YST2 = YST2, INCY	}	{ .mmb	(p16)	LDFD	f92 = [YLD1], 1 * SIZE	(p16)	LDFD	f60 = [YST1], INCY	br.ctop.sptk.few .L992	}	;;.L995:	(p13)	LDFD	f32 = [YST1], INCY	(p13)	LDFD	f40 = [YLD1], 1 * SIZE	tbit.nz	p14, p0 = M, 1	;;	(p13)	LDFD	f33 = [YST1], INCY	(p13)	LDFD	f41 = [YLD1], 1 * SIZE	tbit.nz	p15, p0 = M, 0	;;	(p13)	LDFD	f34 = [YST1], INCY	(p13)	LDFD	f42 = [YLD1], 1 * SIZE	;;	(p13)	LDFD	f35 = [YST1], INCY	(p13)	LDFD	f43 = [YLD1], 1 * SIZE	;;	(p14)	LDFD	f36 = [YST1], INCY	(p14)	LDFD	f44 = [YLD1], 1 * SIZE	;;	(p14)	LDFD	f37 = [YST1], INCY	(p14)	LDFD	f45 = [YLD1], 1 * SIZE	;;	(p15)	LDFD	f38 = [YST1], INCY	(p15)	LDFD	f46 = [YLD1], 1 * SIZE	;;	(p13)	FADD	f32 = f32, f40	(p13)	FADD	f33 = f33, f41	(p13)	FADD	f34 = f34, f42	(p13)	FADD	f35 = f35, f43	(p14)	FADD	f36 = f36, f44	(p14)	FADD	f37 = f37, f45	(p15)	FADD	f38 = f38, f46	;;	(p13)	STFD	[YST2] = f32	(p13)	add YST2 = YST2, INCY	;;	(p13)	STFD	[YST2] = f33	(p13)	add YST2 = YST2, INCY	;;	(p13)	STFD	[YST2] = f34	(p13)	add YST2 = YST2, INCY	;;	(p13)	STFD	[YST2] = f35	(p13)	add YST2 = YST2, INCY	;;	(p14)	STFD	[YST2] = f36	(p14)	add YST2 = YST2, INCY	;;	(p14)	STFD	[YST2] = f37	(p14)	add YST2 = YST2, INCY	;;	(p15)	STFD	[YST2] = f38	;;.L999:	mov	r8 = r0	adds	r9 = 1 * 16, SP	;;	ldf.fill  f16 = [SP], 32	ldf.fill  f17 = [r9], 32	mov	 ar.lc = ARLC	;;		ldf.fill  f18 = [SP], 32	ldf.fill  f19 = [r9], 32	mov pr    = PR, -1	;;		ldf.fill  f20 = [SP], 32	ldf.fill  f21 = [r9], 32	mov	ar.pfs = ARPFS	;;		ldf.fill  f22 = [SP], 32	ldf.fill  f23 = [r9]	br.ret.sptk.many b0	;;	EPILOGUE

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -