⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 gemv_t.s

📁 Optimized GotoBLAS libraries
💻 S
📖 第 1 页 / 共 4 页
字号:
	mov	f13 = f0	}	{ .mmf	(p8) LDFD f34 = [AO3], SIZE	(p8) LDFD f35 = [AO4], SIZE	mov	f15 = f0	}	;;	{ .mmi	adds	RPRE1  = RPREFETCH * SIZE, AO1	adds	RPRE2  = (RPREFETCH + 8) * SIZE, AO2	mov	ar.ec= 2	}	{ .mmi	cmp.eq	p16, p0 = r0, r0	add	I = I, I	adds	AO21 = 7 * SIZE, AO2	}	;;	{ .mmf	adds	WPRE =  4 * SIZE, CO	adds	PREB  = RPREFETCH * SIZE, BO	(p8) FMPY	f8  = f40, f32	}	{ .mmf	adds	RPRE3  = RPREFETCH * SIZE, AO3	adds	I = -1, I	(p8) FMPY	f10 = f40, f33	}	;;	{ .mfi	adds	AO41 = 7 * SIZE, AO4	(p8) FMPY	f12 = f40, f34	mov	ar.lc = I	}	{ .mfb	adds	RPRE4  = (RPREFETCH + 8) * SIZE, AO4	(p8) FMPY	f14 = f40, f35	(p6) br.cond.dpnt .L125	}	;;	.align 16.L122:	{ .mmf	(p17) LDFPD	f72, f87 = [AO4]	(p17) LDFPD	f110, f111 = [BO], 2 * SIZE	(p17) FMA	f8  = f104, f33, f8	}	{ .mfi	(p17) adds	AO4 = 3 * SIZE, AO4	(p17) FMA	f9  = f105, f34, f9	(p16) tbit.nz.unc p14, p15 = I, 0	}	;;	{ .mmf	(p14) PREFETCH [RPRE1], 16 * SIZE	(p16) LDFPD	f32, f33 = [AO1], 2 * SIZE	(p17) FMA	f10 = f104, f35, f10	}	{ .mmf	nop	__LINE__	nop	__LINE__	(p17) FMA	f11 = f105, f36, f11	}	;;	{ .mmf	(p15) PREFETCH [RPRE2], 16 * SIZE	(p16) LDFD	f34      = [AO2], 1 * SIZE	(p17) FMA	f12 = f104, f37, f12	}	{ .mmf	(p17) LDFD	f84      = [AO21], 8 * SIZE	nop	__LINE__	(p17) FMA	f13 = f105, f38, f13	}	;;	{ .mmf	(p14) PREFETCH [RPRE3], 16 * SIZE	(p16) LDFPD	f36, f37 = [AO3], 2 * SIZE	(p17) FMA	f14 = f104, f39, f14	}	{ .mmf	nop	__LINE__	nop	__LINE__	(p17) FMA	f15 = f105, f40, f15	}	;;	{ .mmf	(p15) PREFETCH [RPRE4], 16 * SIZE	(p16) LDFD	f38      = [AO4], 1 * SIZE	(p17) FMA	f8  = f106, f49, f8	}	{ .mmf	(p17) LDFD	f88      = [AO41], 8 * SIZE	nop	__LINE__	(p17) FMA	f9  = f107, f50, f9	}	;;	{ .mmf	(p14) PREFETCH [PREB], 16 * SIZE	(p16) LDFPD	f48, f49 = [AO1], 2 * SIZE	(p17) FMA	f10 = f106, f51, f10	}	{ .mmf	nop	__LINE__	nop	__LINE__	(p17) FMA	f11 = f107, f52, f11	}	;;	{ .mmf	(p16) LDFPD	f35, f50 = [AO2], 2 * SIZE	(p16) LDFPD	f103, f104 = [BO], 2 * SIZE	(p17) FMA	f12 = f106, f53, f12	}	{ .mmf	nop	__LINE__	nop	__LINE__	(p17) FMA	f13 = f107, f54, f13	}	;;	{ .mmf	(p16) LDFPD	f52, f53 = [AO3], 2 * SIZE	nop	__LINE__	(p17) FMA	f14 = f106, f55, f14	}	{ .mmf	nop	__LINE__	nop	__LINE__	(p17) FMA	f15 = f107, f56, f15	}	;;	{ .mmf	(p16) LDFPD	f39, f54 = [AO4], 2 * SIZE	(p16) LDFPD	f105, f106 = [BO], 2 * SIZE	(p17) FMA	f8  = f108, f65, f8	}	{ .mmf	nop	__LINE__	nop	__LINE__	(p17) FMA	f9  = f109, f66, f9	}	;;	{ .mmf	(p16) LDFPD	f64, f65 = [AO1], 2 * SIZE	nop	__LINE__	(p17) FMA	f10 = f108, f67, f10	}	{ .mmf	nop	__LINE__	nop	__LINE__	(p17) FMA	f11 = f109, f68, f11	}	;;	{ .mmf	(p16) LDFPD	f51, f66 = [AO2], 2 * SIZE	nop	__LINE__	(p17) FMA	f12 = f108, f69, f12	}	{ .mmf	nop	__LINE__	nop	__LINE__	(p17) FMA	f13 = f109, f70, f13	}	;;	{ .mmf	(p16) LDFPD	f68, f69 = [AO3], 2 * SIZE	nop	__LINE__	(p17) FMA	f14 = f108, f71, f14	}	{ .mmf	nop	__LINE__	nop	__LINE__	(p17) FMA	f15 = f109, f72, f15	}	;;	{ .mmf	(p16) LDFPD	f55, f70 = [AO4], 2 * SIZE	(p16) LDFPD	f107, f108 = [BO], 2 * SIZE	(p17) FMA	f8  = f110, f81, f8	}	{ .mmf	nop	__LINE__	nop	__LINE__	(p17) FMA	f9  = f111, f82, f9	}	;;	{ .mmf	(p16) LDFPD	f80, f81 = [AO1], 2 * SIZE	nop	__LINE__	(p17) FMA	f10 = f110, f83, f10	}	{ .mmf	nop	__LINE__	nop	__LINE__	(p17) FMA	f11 = f111, f84, f11	}	;;	{ .mmf	(p16) LDFPD	f67, f82 = [AO2]	nop	__LINE__	(p17) FMA	f12 = f110, f85, f12	}	{ .mmf	nop	__LINE__	(p16) adds	AO2 = 3 * SIZE, AO2	(p17) FMA	f13 = f111, f86, f13	}	;;	{ .mmf	(p16) LDFPD	f84, f85 = [AO3], 2 * SIZE	nop	__LINE__	(p17) FMA	f14 = f110, f87, f14	}	{ .mfb	adds	I = -1, I	(p17) FMA	f15 = f111, f88, f15	br.ctop.sptk.few .L122	}	;;	.align 16.L125:	and	I = 15, MIN_M	mov	pr.rot= 0	;;	cmp.eq	p6,  p0 = 0, I	cmp.eq	p16, p15 = r0, r0	;;	adds	I = 1, I	adds	AO21 = 1 * SIZE, AO2	adds	AO41 = 1 * SIZE, AO4	;;	shr	I = I, 1	;;	adds	I = -1, I	;;	mov	ar.lc = I	mov	ar.ec= 3	and	I = 15, MIN_M	(p6) br.cond.dpnt .L128	;;	.align 16.L126:	{ .mmf	(p16) LDFPD	f104, f107 = [BO], 2 * SIZE	(p16) LDFPD	f32,  f35  = [AO1], 2 * SIZE	(p18) FMA	f8  = f106, f34, f8	}	{ .mmf	nop	__LINE__	nop	__LINE__	(p15) FMA	f9  = f109, f37, f9	}	;;	{ .mmf	(p17) LDFD	f42        = [AO21], 2 * SIZE	(p16) LDFD	f38        = [AO2], 2 * SIZE	(p18) FMA	f10 = f106, f40, f10	}	{ .mmf	nop	__LINE__	nop	__LINE__	(p15) FMA	f11 = f109, f43, f11	}	;;	{ .mmf	(p16) LDFPD	f44,  f47  = [AO3], 2 * SIZE	nop	__LINE__	(p18) FMA	f12 = f106, f46, f12	}	{ .mmf	nop	__LINE__	(p17) adds	I = -2, I	(p15) FMA	f13 = f109, f49, f13	}	;;	{ .mmf	(p17) LDFD	f54        = [AO41], 2 * SIZE	(p16) LDFD	f50        = [AO4], 2 * SIZE	(p15) FMA	f15 = f109, f55, f15	}	{ .mfb	(p17) cmp.ne.unc p15, p0 = -1, I	(p18) FMA	f14 = f106, f52, f14	br.ctop.sptk.few .L126	}	;;.L128:	{ .mmf	mov	AO1 = CO	LDFD	f32 = [CO], INCY	FADD	f8  = f8,  f9	}	;;	{ .mmf	LDFD	f33 = [CO], INCY	nop	__LINE__	FADD	f10 = f10, f11	}	;;	{ .mmf	LDFD	f34 = [CO], INCY	nop	__LINE__	FADD	f12 = f12, f13	}	;;	{ .mmf	LDFD	f35 = [CO], INCY	nop	__LINE__	FADD	f14 = f14, f15	}	;;	{ .mmf	nop	__LINE__	nop	__LINE__	FMA	f32 = ALPHA, f8,  f32	}	{ .mmf	nop	__LINE__	nop	__LINE__	FMA	f33 = ALPHA, f10, f33	}	{ .mmf	nop	__LINE__	nop	__LINE__	FMA	f34 = ALPHA, f12, f34	}	{ .mmf	nop	__LINE__	nop	__LINE__	FMA	f35 = ALPHA, f14, f35	}	;;	{ .mmf	STFD [AO1] = f32	add	AO1 = AO1, INCY	}	;;	{ .mmf	STFD [AO1] = f33	add	AO1 = AO1, INCY	}	;;	{ .mmf	STFD [AO1] = f34	add	AO1 = AO1, INCY	}	;;	{ .mmf	STFD [AO1] = f35	add	AO1 = AO1, INCY	}	;;	.align 16.L130:	{ .mfi	mov	AO1 = A	mov	f8  = f0	mov	pr.rot= 0	}	{ .mfi	add	AO2 = LDA, A	mov	f10 = f0	tbit.z	p6, p0  = N, 1	}	;;	{ .mfi	mov	BO  = BUFFER	mov	f12 = f0	shr	I = MIN_M, 4	}	{ .mfb	adds	WPRE =  4 * SIZE, CO	mov	f14 = f0	(p6) br.cond.dpnt .L140	}	;;	{ .mmf	(p8) LDFD f32 = [AO1], SIZE	(p8) LDFD f33 = [AO2], SIZE	mov	f9  = f0	}	{ .mfi	shladd	A   = LDA, 1, A	mov	f11 = f0	mov	ar.ec= 2	}	;;	{ .mmf	(p8) LDFD f40 = [BO], 2 * SIZE	cmp.eq	p6, p0 = 0, I	mov	f13 = f0	}	{ .mmf	adds	RPRE1  = RPREFETCH * SIZE, AO1	add	I = I, I	mov	f15 = f0	}	;;	{ .mmi	cmp.eq	p16, p0 = r0, r0	adds	RPRE2  = (RPREFETCH + 8) * SIZE, AO2	adds	I = -1, I	}	;;	{ .mfi	adds	AO21 = 7 * SIZE, AO2	(p8) FMPY	f8  = f40, f32	mov	ar.lc = I	}	{ .mfb	adds	PREB  = RPREFETCH * SIZE, BO	(p8) FMPY	f10 = f40, f33	(p6) br.cond.dpnt .L135	}	;;	.align 16.L132:	{ .mmf	(p17) LDFPD	f68, f83 = [AO2]	(p17) LDFPD	f110, f111 = [BO], 2 * SIZE	(p17) FMA	f8  = f104, f33, f8	}	{ .mfi	(p17) adds	AO2 = 3 * SIZE, AO2	(p17) FMA	f9  = f105, f34, f9	(p16) tbit.nz.unc p14, p15 = I, 0	}	;;	{ .mmf	(p14) PREFETCH [RPRE1], 16 * SIZE	(p16) LDFPD	f32, f33 = [AO1], 2 * SIZE	(p17) FMA	f10 = f104, f35, f10	}	{ .mmf	nop	__LINE__	nop	__LINE__	(p17) FMA	f11 = f105, f36, f11	}	;;	{ .mmf	(p15) PREFETCH [RPRE2], 16 * SIZE	(p16) LDFD	f34      = [AO2], 1 * SIZE	(p17) FMA	f8  = f106, f49, f8	}	{ .mmf	(p17) LDFD	f84      = [AO21], 8 * SIZE	nop	__LINE__	(p17) FMA	f9  = f107, f50, f9	}	;;	{ .mmf	(p14) PREFETCH [PREB], 16 * SIZE	(p16) LDFPD	f48, f49 = [AO1], 2 * SIZE	(p17) FMA	f10 = f106, f51, f10	}	{ .mmf	nop	__LINE__	nop	__LINE__	(p17) FMA	f11 = f107, f52, f11	}	;;	{ .mmf	(p16) LDFPD	f35, f50 = [AO2], 2 * SIZE	(p16) LDFPD	f103, f104 = [BO], 2 * SIZE	(p17) FMA	f8  = f108, f65, f8	}	{ .mmf	nop	__LINE__	nop	__LINE__	(p17) FMA	f9  = f109, f66, f9	}	;;	{ .mmf	(p16) LDFPD	f105, f106 = [BO], 2 * SIZE	(p16) LDFPD	f64, f65 = [AO1], 2 * SIZE	(p17) FMA	f10 = f108, f67, f10	}	{ .mmf	nop	__LINE__	nop	__LINE__	(p17) FMA	f11 = f109, f68, f11	}	;;	{ .mmf	(p16) LDFPD	f51, f66 = [AO2], 2 * SIZE	(p16) LDFPD	f107, f108 = [BO], 2 * SIZE	(p17) FMA	f8  = f110, f81, f8	}	{ .mmf	nop	__LINE__	nop	__LINE__	(p17) FMA	f9  = f111, f82, f9	}	;;	{ .mmf	(p16) LDFPD	f80, f81 = [AO1], 2 * SIZE	nop	__LINE__	(p17) FMA	f10 = f110, f83, f10	}	{ .mfb	adds	I = -1, I	(p17) FMA	f11 = f111, f84, f11	br.ctop.sptk.few .L132	}	;;	.align 16.L135:	and	I = 15, MIN_M	;;	cmp.eq	p6,  p0 = 0, I	(p6) br.cond.dpnt .L138	;;	tbit.nz	p12, p0 = MIN_M, 3	tbit.nz	p13, p0 = MIN_M, 2	tbit.nz	p14, p0 = MIN_M, 1	tbit.nz	p15, p0 = MIN_M, 0	;;	(p12) LDFPD	f100, f101 = [BO], 2 * SIZE	(p12) LDFPD	f32,  f33  = [AO1], 2 * SIZE	(p12) LDFD	f34        = [AO2], 1 * SIZE	;;	(p12) LDFPD	f36,  f37  = [AO1], 2 * SIZE	(p12) LDFPD	f35,  f38  = [AO2], 2 * SIZE	;;	(p12) LDFPD	f102, f103 = [BO],  2 * SIZE	(p12) LDFPD	f39,  f42  = [AO2], 2 * SIZE	;;	(p12) LDFPD	f40,  f41  = [AO1], 2 * SIZE	(p12) LDFPD	f43,  f46  = [AO2], 2 * SIZE	;;	(p12) LDFPD	f104, f105 = [BO], 2 * SIZE	(p12) LDFPD	f44,  f45  = [AO1], 2 * SIZE	(p12) LDFD	f47        = [AO2], 1 * SIZE	;;	(p12) LDFPD	f106, f107 = [BO], 2 * SIZE	(p13) LDFD	f50        = [AO2], 1 * SIZE	(p13) LDFPD	f48,  f49  = [AO1], 2 * SIZE	;;	(p13) LDFPD	f108, f109 = [BO], 2 * SIZE	(p13) LDFPD	f51,  f54  = [AO2], 2 * SIZE	;;	(p13) LDFPD	f110, f111 = [BO], 2 * SIZE	(p13) LDFPD	f52,  f53  = [AO1], 2 * SIZE	(p13) LDFD	f55        = [AO2], 1 * SIZE	;;	(p14) LDFPD	f56,  f57  = [AO1], 2 * SIZE	(p14) LDFD	f58        = [AO2], 1 * SIZE	;;	(p14) LDFPD	f112, f113 = [BO], 2 * SIZE	(p15) LDFD	f60        = [AO1]	(p14) LDFD	f59        = [AO2], 1 * SIZE	;;	(p15) LDFD	f61        = [AO2]	(p15) LDFD	f114       = [BO]	;;	(p12) FMA	f8  = f100, f32, f8	(p12) FMA	f9  = f101, f33, f9	(p12) FMA	f10 = f100, f34, f10	(p12) FMA	f11 = f101, f35, f11	;;	(p12) FMA	f12 = f102, f36, f12	(p12) FMA	f13 = f103, f37, f13	(p12) FMA	f14 = f102, f38, f14	(p12) FMA	f15 = f103, f39, f15	;;	(p12) FMA	f8  = f104, f40, f8	(p12) FMA	f9  = f105, f41, f9	(p12) FMA	f10 = f104, f42, f10	(p12) FMA	f11 = f105, f43, f11	;;	(p12) FMA	f12 = f106, f44, f12	(p12) FMA	f13 = f107, f45, f13	(p12) FMA	f14 = f106, f46, f14	(p12) FMA	f15 = f107, f47, f15	;;	(p13) FMA	f8  = f108, f48, f8	(p13) FMA	f9  = f109, f49, f9	(p13) FMA	f10 = f108, f50, f10	(p13) FMA	f11 = f109, f51, f11	;;	(p13) FMA	f12 = f110, f52, f12	(p13) FMA	f13 = f111, f53, f13	(p13) FMA	f14 = f110, f54, f14	(p13) FMA	f15 = f111, f55, f15	;;	(p14) FMA	f8  = f112, f56, f8	(p14) FMA	f9  = f113, f57, f9	(p14) FMA	f10 = f112, f58, f10	(p14) FMA	f11 = f113, f59, f11	;;	(p15) FMA	f12 = f114, f60, f12	(p15) FMA	f14 = f114, f61, f14	;;.L138:	FADD	f8  = f8,  f9	FADD	f10 = f10, f11	FADD	f12 = f12, f13	FADD	f14 = f14, f15	;;	FADD	f8  = f8,  f12	FADD	f10 = f10, f14	;;	{ .mmf	mov	AO1 = CO	LDFD	f32 = [CO], INCY	}	;;	{ .mmf	LDFD	f33 = [CO], INCY	nop	__LINE__	}	;;	{ .mmf	nop	__LINE__	nop	__LINE__	FMA	f32 = ALPHA, f8,  f32	}	{ .mmf	nop	__LINE__	nop	__LINE__	FMA	f33 = ALPHA, f10, f33	}	;;	{ .mmf	STFD [AO1] = f32	add	AO1 = AO1, INCY	}	;;	{ .mmf	STFD [AO1] = f33	}	;;	.align 16.L140:	{ .mfi	mov	AO1 = A	mov	f8  = f0	shr	I = MIN_M, 4	}	{ .mfi	mov	BO  = BUFFER	mov	f10 = f0	tbit.z	p7, p0  = N, 0	}	;;	{ .mfi	cmp.eq	p6, p0 = 0, I	mov	f12 = f0	mov	pr.rot= 0	}	{ .mfb	add	I = I, I	mov	f14 = f0	(p7) br.cond.dpnt .L199	}	;;	{ .mfi	(p8) LDFD f32 = [AO1], SIZE	mov	f9  = f0	mov	ar.ec= 2	}	{ .mmf	(p8) LDFD f40 = [BO], 2 * SIZE	add	A   = A, LDA	mov	f11 = f0	}	;;	{ .mmf	adds	WPRE =  1 * SIZE, CO	adds	PREB  = RPREFETCH * SIZE, BO	mov	f13 = f0	}	{ .mmf	cmp.eq	p16, p0 = r0, r0	adds	I = -1, I	mov	f15 = f0	}	;;	{ .mfi	lfetch.excl.nt1	[WPRE]	(p8) FMPY	f8  = f40, f32	mov	ar.lc = I	}	{ .mmb	nop	__LINE__	nop	__LINE__	(p6) br.cond.dpnt .L145	}	;;	.align 16.L142:	{ .mmf	(p17) LDFPD	f81, f82   = [AO1], 2 * SIZE	(p17) LDFPD	f110, f111 = [BO], 2 * SIZE	(p17) FMA	f8  = f104, f33, f8	}	{ .mfi	nop	__LINE__	(p17) FMA	f9  = f105, f34, f9	(p16) tbit.nz.unc p14, p15 = I, 0	}	;;	{ .mmf	(p16) LDFPD	f32, f33   = [AO1], 2 * SIZE	(p16) LDFPD	f103, f104 = [BO], 2 * SIZE	(p17) FMA	f8  = f106, f49, f8	}	{ .mmf	nop	__LINE__	nop	__LINE__	(p17) FMA	f9  = f107, f50, f9	}	;;	{ .mmf	(p16) LDFPD	f105, f106 = [BO], 2 * SIZE	(p16) LDFPD	f48, f49 = [AO1], 2 * SIZE	(p17) FMA	f8  = f108, f65, f8	}	{ .mmf	nop	__LINE__	nop	__LINE__	(p17) FMA	f9  = f109, f66, f9	}	;;	{ .mmf	(p16) LDFPD	f64, f65  = [AO1], 2 * SIZE	(p16) LDFPD	f107, f108 = [BO], 2 * SIZE	(p17) FMA	f8  = f110, f81, f8	}	{ .mfb	adds	I = -1, I	(p17) FMA	f9  = f111, f82, f9	br.ctop.sptk.few .L142	}	;;	.align 16.L145:	and	I = 15, MIN_M	;;	cmp.eq	p6,  p0 = 0, I	(p6) br.cond.dpnt .L148	;;	tbit.nz	p12, p0 = MIN_M, 3	tbit.nz	p13, p0 = MIN_M, 2	tbit.nz	p14, p0 = MIN_M, 1	tbit.nz	p15, p0 = MIN_M, 0	;;	(p12) LDFPD	f32,  f33  = [AO1], 2 * SIZE	(p12) LDFPD	f100, f101 = [BO], 2 * SIZE	;;	(p12) LDFPD	f36,  f37  = [AO1], 2 * SIZE	(p12) LDFPD	f102, f103 = [BO], 2 * SIZE	;;	(p12) LDFPD	f40,  f41  = [AO1], 2 * SIZE	(p12) LDFPD	f104, f105 = [BO], 2 * SIZE	;;	(p12) LDFPD	f44,  f45  = [AO1], 2 * SIZE	(p12) LDFPD	f106, f107 = [BO], 2 * SIZE	;;	(p13) LDFPD	f48,  f49  = [AO1], 2 * SIZE	(p13) LDFPD	f108, f109 = [BO], 2 * SIZE	;;	(p13) LDFPD	f52,  f53  = [AO1], 2 * SIZE	(p13) LDFPD	f110, f111 = [BO], 2 * SIZE	;;	(p14) LDFPD	f56,  f57  = [AO1], 2 * SIZE	(p14) LDFPD	f112, f113 = [BO], 2 * SIZE	;;	(p15) LDFD	f60        = [AO1]	(p15) LDFD	f114       = [BO]	;;	(p12) FMA	f8  = f100, f32, f8	(p12) FMA	f9  = f101, f33, f9	(p12) FMA	f10 = f102, f36, f10	(p12) FMA	f11 = f103, f37, f11	(p12) FMA	f12 = f104, f40, f12	(p12) FMA	f13 = f105, f41, f13	(p12) FMA	f14 = f106, f44, f14	(p12) FMA	f15 = f107, f45, f15	;;	(p13) FMA	f8  = f108, f48, f8	(p13) FMA	f9  = f109, f49, f9	(p13) FMA	f10 = f110, f52, f10	(p13) FMA	f11 = f111, f53, f11	(p14) FMA	f12 = f112, f56, f12	(p14) FMA	f13 = f113, f57, f13	(p15) FMA	f14 = f114, f60, f14	;;.L148:	{ .mmf	LDFD	f32 = [CO]	nop	__LINE__	FADD	f8  = f8,  f9	}	{ .mmf	nop	__LINE__	nop	__LINE__	FADD	f10 = f10, f11	}	;;	{ .mmf	nop	__LINE__	nop	__LINE__	FADD	f12 = f12, f13	}	{ .mmf	nop	__LINE__	nop	__LINE__	FADD	f14 = f14, f15	}	;;	{ .mmf	nop	__LINE__	nop	__LINE__	FADD	f8  = f8,  f12	}	{ .mmf	nop	__LINE__	nop	__LINE__	FADD	f10 = f10, f14	}	;;	{ .mmf	nop	__LINE__	nop	__LINE__	FADD	f8  = f8,  f10	}	;;	{ .mmf	nop	__LINE__	nop	__LINE__	FMA	f32 = ALPHA, f8,  f32	}	;;	{ .mmf	STFD [CO] = f32	nop	__LINE__	nop	__LINE__	}	;;	.align 16.L199:	adds	IS = P, IS	shladd	A  = LDAP, BASE_SHIFT, A	;;	cmp.gt	p6, p0 = M, IS	(p6) br.cond.dptk .LIs_loop	.align 4	;;.L999:	mov	r8 = r0	adds	r9 = 1 * 16, SP	;;	ldf.fill  f16 = [SP], 32	ldf.fill  f17 = [r9], 32	mov	 ar.lc = ARLC	;;		ldf.fill  f18 = [SP], 32	ldf.fill  f19 = [r9], 32	mov pr    = PR, -1	;;		ldf.fill  f20 = [SP], 32	ldf.fill  f21 = [r9], 32	mov	ar.pfs = ARPFS	;;		ldf.fill  f22 = [SP], 32	ldf.fill  f23 = [r9]	br.ret.sptk.many b0	;;	EPILOGUE

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -