⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 saxpy.s

📁 Optimized GotoBLAS libraries
💻 S
📖 第 1 页 / 共 2 页
字号:
/*********************************************************************//*                                                                   *//*             Optimized BLAS libraries                              *//*                     By Kazushige Goto <kgoto@tacc.utexas.edu>     *//*                                                                   *//* Copyright (c) The University of Texas, 2005. All rights reserved. *//* UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING  *//* THIS SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF      *//* MERCHANTABILITY, FITNESS FOR ANY PARTICULAR PURPOSE,              *//* NON-INFRINGEMENT AND WARRANTIES OF PERFORMANCE, AND ANY WARRANTY  *//* THAT MIGHT OTHERWISE ARISE FROM COURSE OF DEALING OR USAGE OF     *//* TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH RESPECT TO   *//* THE USE OF THE SOFTWARE OR DOCUMENTATION.                         *//* Under no circumstances shall University be liable for incidental, *//* special, indirect, direct or consequential damages or loss of     *//* profits, interruption of business, or related expenses which may  *//* arise from use of Software or Documentation, including but not    *//* limited to those resulting from defects in Software and/or        *//* Documentation, or loss or inaccuracy of data of any kind.         *//*********************************************************************/#define ASSEMBLER#include "common.h"#define PREFETCHSIZE 64 * 8#define N	r32#define X	r36#define INCX	r37#define Y	r38#define INCY	r39#define PRE1	r2#define PRE2	r3#define I	r14#define J	r15#define Y1	r16#define Y2	r17#define X1	r18#define X2	r19#define INCX16	r20#define INCY16	r21#define YYY	r25#define YY	r27#define XA	r28#define XB	r29#define PR	r30#define ARLC	r31	#define ALPHA	f8#define ALPHA_P	f9	PROLOGUE	.prologue	PROFCODE	{ .mii	shladd	INCX = INCX, BASE_SHIFT, r0	.save ar.lc, ARLC	mov	ARLC = ar.lc	tbit.nz	p10, p0 = X, BASE_SHIFT	}	{ .mfb	cmp.lt	p0, p6 = r0, N	fcmp.eq	p7, p0 = ALPHA, f0	(p6) br.ret.sptk.many b0	}	;;	.body	{ .mmi	(p10) LDFD	f32 = [X], INCX	shladd	INCY = INCY, BASE_SHIFT, r0	mov	PR = pr	}	{ .mib	(p10) adds N = -1, N	mov  YYY = Y 	(p7) br.ret.sptk.many b0	}	;;	{ .mmi	(p10) LDFD	f33 = [Y], INCY	cmp.ne	p13, p0 = SIZE, INCX	shr    XA = X, 2	}	{ .mmi	shladd	INCX16 = INCX, 4, r0	shladd	INCY16 = INCY, 4, r0	nop.i 0	}	;;	{ .mii	mov	Y1 = Y	tbit.nz	p11, p0 = Y, BASE_SHIFT	shr    XB = Y, 2	}	;;	{ .mmf	and	XA = 0x3f, XA	and	XB = 0x3f, XB	(p10) FMA f32 = ALPHA, f32, f33	}	;;	{ .mmi	sub	XA = XB, XA	shladd	Y2 = INCY, 2, Y	mov	pr.rot = 0x10000	}	{ .mbb	cmp.ne p14, p0 = SIZE, INCY	(p13) br.cond.dpnt .L100	(p14) br.cond.dpnt .L100	}	;;	{ .mmi	cmp.gt	p14, p0 =  r0, XA	;;	and	J =  15, N	shr	I =  N, 4	}	{ .mfb	(p14) adds XA = 64, XA	fpack	ALPHA_P = f8, f8	(p11) br.cond.dpnt .L30	}	;;	{ .mmi	cmp.gt	p14, p0 =  32, XA	cmp.lt	p15, p0 =  58, XA	mov	ar.ec = 3	}	{ .mmi	and	J =  31, N	cmp.eq	p16, p0 = r0, r0	shr	I =  N, 5	}	;;	{ .mmi	cmp.eq	p9, p0  =   r0, J	cmp.eq	p7 ,p0  = 0, I	adds	I = -1, I	}	{ .mbb	nop.m 0	(p14) br.cond.dpnt .L20	(p15) br.cond.dpnt .L20	}	;;	{ .mmi	(p10) STFD [YYY] = f32	adds	PRE1 = PREFETCHSIZE * SIZE, X	mov	ar.lc = I	}	{ .mib	adds	PRE2 = (PREFETCHSIZE - 24) * SIZE, Y	tbit.z	p0, p11 = N, 4	(p7) br.cond.dpnt  .L15	}	;;	.align 32.L12:/* 0 */	{ .mmf	(p18) stf8	[Y1] = f6,   2 * SIZE	(p16) lfetch.nt1 [PRE1], 32 * SIZE	(p18) fpma	f12  = ALPHA_P, f46, f94	}	{ .mmi	(p16) ldf8	f32 = [X], 2 * SIZE	(p16) ldf8	f80 = [Y], 2 * SIZE	}	;;/* 1 */	{ .mmf	(p18) stf8	[Y1] = f7,  2 * SIZE	(p16) lfetch.excl.nt1	[PRE2], 32 * SIZE	(p18) fpma	f13  = ALPHA_P, f49, f97	}	{ .mmi	(p16) ldf8	f35 = [X], 2 * SIZE	(p16) ldf8	f83 = [Y], 2 * SIZE	}	;;/* 2 */	{ .mmf	(p18) stf8	[Y1] = f10,  2 * SIZE	(p18) fpma	f14  = ALPHA_P, f52, f100	}	{ .mmi	(p16) ldf8	f38 = [X], 2 * SIZE	(p16) ldf8	f86 = [Y], 2 * SIZE	}	;;/* 3 */	{ .mmf	(p18) stf8	[Y1] = f11, 2 * SIZE	(p18) fpma	f15  = ALPHA_P, f55, f103	}	{ .mmi	(p16) ldf8	f41 = [X], 2 * SIZE	(p16) ldf8	f89 = [Y], 2 * SIZE	}	;;/* 4 */	{ .mmf	(p18) stf8	[Y1] = f12,  2 * SIZE	(p18) fpma	f6   = ALPHA_P, f58, f106	}	{ .mmi	(p16) ldf8	f44  = [X], 2 * SIZE	(p16) ldf8	f92  = [Y], 2 * SIZE	}	;;/* 5 */	{ .mmf	(p18) stf8	[Y1] = f13,  2 * SIZE	(p18) fpma	f7   = ALPHA_P, f61, f109	}	{ .mmi	(p16) ldf8	f47  = [X], 2 * SIZE	(p16) ldf8	f95  = [Y], 2 * SIZE	}	;;/* 6 */	{ .mmf	(p18) stf8	[Y1] = f14,  2 * SIZE	(p18) fpma	f10  = ALPHA_P, f64, f112	}	{ .mmi	(p16) ldf8	f50  = [X], 2 * SIZE	(p16) ldf8	f98  = [Y], 2 * SIZE	}	;;/* 7 */	{ .mmf	(p18) stf8	[Y1] = f15, 2 * SIZE	(p18) fpma	f11  = ALPHA_P, f67, f115	}	{ .mmi	(p16) ldf8	f53  = [X], 2 * SIZE	(p16) ldf8	f101 = [Y], 2 * SIZE	}	;;/* 8 */	{ .mmf	(p18) stf8	[Y1] = f6,   2 * SIZE	(p18) fpma	f12  = ALPHA_P, f70, f118	}	{ .mmi	(p16) ldf8	f56 = [X], 2 * SIZE	(p16) ldf8	f104 = [Y], 2 * SIZE	}	;;/* 9 */	{ .mmf	(p18) stf8	[Y1] = f7,  2 * SIZE	(p18) fpma	f13  = ALPHA_P, f73, f121	}	{ .mmi	(p16) ldf8	f59 = [X], 2 * SIZE	(p16) ldf8	f107 = [Y], 2 * SIZE	}	;;/* 10 */	{ .mmf	(p18) stf8	[Y1] = f10,  2 * SIZE	(p18) fpma	f14 = ALPHA_P, f76, f124	}	{ .mmi	(p16) ldf8	f62 = [X], 2 * SIZE	(p16) ldf8	f110 = [Y], 2 * SIZE	}	;;/* 11 */	{ .mmf	(p18) stf8	[Y1] = f11, 2 * SIZE	(p18) fpma	f15  = ALPHA_P, f79, f127	}	{ .mmi	(p16) ldf8	f65 = [X], 2 * SIZE	(p16) ldf8	f113 = [Y], 2 * SIZE	}	;;/* 12 */	{ .mmf	(p18) stf8	[Y1] = f12,  2 * SIZE	(p17) fpma	f6   = ALPHA_P, f33, f81	}	{ .mmi	(p16) ldf8	f68  = [X], 2 * SIZE	(p16) ldf8	f116 = [Y], 2 * SIZE	}	;;/* 13 */	{ .mmf	(p18) stf8	[Y1] = f13,  2 * SIZE	(p17) fpma	f7   = ALPHA_P, f36, f84	}	{ .mmi	(p16) ldf8	f71  = [X], 2 * SIZE	(p16) ldf8	f119 = [Y], 2 * SIZE	}	;;/* 14 */	{ .mmf	(p18) stf8	[Y1] = f14,  2 * SIZE	(p17) fpma	f10  = ALPHA_P, f39, f87	}	{ .mmi	(p16) ldf8	f74  = [X], 2 * SIZE	(p16) ldf8	f122 = [Y], 2 * SIZE	}	;;/*15 */	{ .mmf	(p18) stf8	[Y1] = f15, 2 * SIZE	(p17) fpma	f11  = ALPHA_P, f42, f90	}	{ .mmb	(p16) ldf8	f77  = [X], 2 * SIZE	(p16) ldf8	f125 = [Y], 2 * SIZE	br.ctop.sptk.few .L12	}	;;	.align 32.L15:	{ .mmi	(p11) ldf8	f32 = [X], 2 * SIZE	(p11) ldf8	f33 = [Y], 2 * SIZE	mov	pr = PR, -65474	}	;;	{ .mmi	(p11) ldf8	f34 = [X], 2 * SIZE	(p11) ldf8	f35 = [Y], 2 * SIZE	mov	ar.lc  = ARLC	}	;;	{ .mmb	(p11) ldf8	f36 = [X], 2 * SIZE	(p11) ldf8	f37 = [Y], 2 * SIZE	(p9) br.ret.sptk.many b0	}	;;	{ .mmi	(p11) ldf8	f38 = [X], 2 * SIZE	(p11) ldf8	f39 = [Y], 2 * SIZE	tbit.z	p0, p12 = N, 3	}	;;	{ .mmi	(p11) ldf8	f40 = [X], 2 * SIZE	(p11) ldf8	f41 = [Y], 2 * SIZE	tbit.z	p0, p13 = N, 2	}	;;	{ .mmi	(p11) ldf8	f42 = [X], 2 * SIZE	(p11) ldf8	f43 = [Y], 2 * SIZE	tbit.z	p0, p14 = N, 1	}	;;	{ .mmf	(p11) ldf8	f44 = [X], 2 * SIZE	(p11) ldf8	f45 = [Y], 2 * SIZE	(p11) fpma	f6  = ALPHA_P, f32, f33	}	;;	{ .mmf	(p11) ldf8	f46 = [X], 2 * SIZE	(p11) ldf8	f47 = [Y], 2 * SIZE	(p11) fpma	f7  = ALPHA_P, f34, f35	}	;;	{ .mmf	(p12) ldf8	f48 = [X], 2 * SIZE	(p12) ldf8	f49 = [Y], 2 * SIZE	(p11) fpma	f10 = ALPHA_P, f36, f37	}	;;	{ .mmi	(p11) stf8	[Y1] = f6,   2 * SIZE	nop.m 0	tbit.z	p0, p15 = N, 0	}	{ .mmf	(p12) ldf8	f50 = [X], 2 * SIZE	(p12) ldf8	f51 = [Y], 2 * SIZE	(p11) fpma	f11 = ALPHA_P, f38, f39	}	;;	{ .mmi	(p11) stf8	[Y1] = f7,   2 * SIZE	nop.m 0	nop.i 0	}	{ .mmf	(p12) ldf8	f52 = [X], 2 * SIZE	(p12) ldf8	f53 = [Y], 2 * SIZE	}	;;	{ .mmi	(p11) stf8	[Y1] = f10,  2 * SIZE	nop.m 0	nop.i 0	}	{ .mmf	(p12) ldf8	f54 = [X], 2 * SIZE	(p12) ldf8	f55 = [Y], 2 * SIZE	(p11) fpma	f12 = ALPHA_P, f40, f41	}	;;	{ .mmi	(p11) stf8	[Y1] = f11,  2 * SIZE	nop.m 0	nop.i 0	}	{ .mmf	(p13) ldf8	f56 = [X], 2 * SIZE	(p13) ldf8	f57 = [Y], 2 * SIZE	(p11) fpma	f13 = ALPHA_P, f42, f43	}	;;	{ .mmi	(p11) stf8	[Y1] = f12,  2 * SIZE	nop.m 0	nop.i 0	}	{ .mmf	(p13) ldf8	f58 = [X], 2 * SIZE	(p13) ldf8	f59 = [Y], 2 * SIZE	(p11) fpma	f14 = ALPHA_P, f44, f45	}	;;	{ .mmi	(p11) stf8	[Y1] = f13,  2 * SIZE	nop.m 0	nop.i 0	}	{ .mmf	(p14) ldf8	f60 = [X], 2 * SIZE	(p14) ldf8	f61 = [Y], 2 * SIZE	(p11) fpma	f15 = ALPHA_P, f46, f47	}	;;	{ .mmi	(p11) stf8	[Y1] = f14,  2 * SIZE	nop.m 0	nop.i 0	}	{ .mmf	(p15) ldfs	f62  = [X]	(p15) ldfs	f63  = [Y]	(p12) fpma	f6  = ALPHA_P, f48, f49	}	;;	(p12) fpma	f7  = ALPHA_P, f50, f51	(p12) fpma	f10 = ALPHA_P, f52, f53	;;	(p11) stf8	[Y1] = f15,  2 * SIZE	(p12) fpma	f11 = ALPHA_P, f54, f55	;;	(p12) stf8	[Y1] = f6,   2 * SIZE	(p13) fpma	f12 = ALPHA_P, f56, f57	;;	(p12) stf8	[Y1] = f7,   2 * SIZE	(p13) fpma	f13 = ALPHA_P, f58, f59	;;	(p12) stf8	[Y1] = f10,  2 * SIZE	(p14) fpma	f14 = ALPHA_P, f60, f61	;;	(p12) stf8	[Y1] = f11,  2 * SIZE	(p15) FMA	f15 = ALPHA,   f62, f63	;;	(p13) stf8	[Y1] = f12,  2 * SIZE	;;	(p13) stf8	[Y1] = f13,  2 * SIZE	;;	(p14) stf8	[Y1] = f14,  2 * SIZE	;;	(p15) stfs	[Y1] = f15	br.ret.sptk.many b0	;;	.align 32/* X is aligned; case 2 */.L20:	{ .mmi	(p10) STFD [YYY] = f32	adds	PRE1 = (PREFETCHSIZE - 28) *  SIZE, X	mov	ar.lc = I	}	{ .mib	adds	PRE2 = (PREFETCHSIZE +  4) * SIZE, Y	tbit.z	p0, p11 = N, 4	(p7) br.cond.dpnt  .L25	}	;;	.align 32.L22:/* 0 */	{ .mmf	(p18) stf8	[Y1] = f6,   2 * SIZE	(p16) lfetch.nt1	[PRE1], 32 * SIZE	(p18) fpma	f12  = ALPHA_P, f46, f94	}	{ .mmi	(p17) ldf8	f60  = [X], 2 * SIZE	(p16) ldf8	f80  = [Y], 2 * SIZE	}	;;/* 1 */	{ .mmf	(p18) stf8	[Y1] = f7,  2 * SIZE	(p16) lfetch.excl.nt1	[PRE2], 32 * SIZE	(p18) fpma	f13  = ALPHA_P, f49, f97	}	{ .mmi	(p17) ldf8	f63  = [X], 2 * SIZE	(p16) ldf8	f83  = [Y], 2 * SIZE	}	;;/* 2 */	{ .mmf	(p18) stf8	[Y1] = f10,  2 * SIZE	(p18) fpma	f14  = ALPHA_P, f52, f100	}	{ .mmi	(p17) ldf8	f66  = [X], 2 * SIZE		(p16) ldf8	f86  = [Y], 2 * SIZE	}	;;/* 3 */	{ .mmf	(p18) stf8	[Y1] = f11, 2 * SIZE	(p18) fpma	f15  = ALPHA_P, f55, f103	}	{ .mmi	(p17) ldf8	f69  = [X], 2 * SIZE	(p16) ldf8	f89  = [Y], 2 * SIZE	}	;;/* 4 */	{ .mmf	(p18) stf8	[Y1] = f12,  2 * SIZE	(p18) fpma	f6   = ALPHA_P, f58, f106	}	{ .mmi	(p17) ldf8	f72  = [X], 2 * SIZE	(p16) ldf8	f92  = [Y], 2 * SIZE	}	;;/* 5 */	{ .mmf	(p18) stf8	[Y1] = f13,  2 * SIZE	(p18) fpma	f7   = ALPHA_P, f61, f109	}	{ .mmi	(p17) ldf8	f75  = [X], 2 * SIZE	(p16) ldf8	f95  = [Y], 2 * SIZE	}	;;/* 6 */	{ .mmf	(p18) stf8	[Y1] = f14,  2 * SIZE	(p18) fpma	f10  = ALPHA_P, f64, f112	}	{ .mmi	(p17) ldf8	f78  = [X], 2 * SIZE	(p16) ldf8	f98  = [Y], 2 * SIZE	}	;;/* 7 */	{ .mmf	(p18) stf8	[Y1] = f15, 2 * SIZE	(p18) fpma	f11  = ALPHA_P, f67, f115	}	{ .mmi	(p16) ldf8	f32  = [X], 2 * SIZE	(p16) ldf8	f101 = [Y], 2 * SIZE	}	;;/* 8 */	{ .mmf	(p18) stf8	[Y1] = f6,   2 * SIZE	(p18) fpma	f12  = ALPHA_P, f70, f118	}	{ .mmi	(p16) ldf8	f35 = [X], 2 * SIZE	(p16) ldf8	f104 = [Y], 2 * SIZE	}	;;/* 9 */	{ .mmf	(p18) stf8	[Y1] = f7,  2 * SIZE	(p18) fpma	f13  = ALPHA_P, f73, f121	}	{ .mmi	(p16) ldf8	f38 = [X], 2 * SIZE	(p16) ldf8	f107 = [Y], 2 * SIZE	}	;;/* 10 */	{ .mmf	(p18) stf8	[Y1] = f10,  2 * SIZE	(p18) fpma	f14 = ALPHA_P, f76, f124	}	{ .mmi	(p16) ldf8	f41 = [X], 2 * SIZE	(p16) ldf8	f110 = [Y], 2 * SIZE	}	;;/* 11 */	{ .mmf	(p18) stf8	[Y1] = f11, 2 * SIZE	(p18) fpma	f15  = ALPHA_P, f79, f127	}	{ .mmi	(p16) ldf8	f44  = [X], 2 * SIZE	(p16) ldf8	f113 = [Y], 2 * SIZE	}	;;/* 12 */	{ .mmf	(p18) stf8	[Y1] = f12,  2 * SIZE	(p17) fpma	f6   = ALPHA_P, f33, f81	}	{ .mmi	(p16) ldf8	f47  = [X], 2 * SIZE	(p16) ldf8	f116 = [Y], 2 * SIZE	}	;;/* 13 */	{ .mmf	(p18) stf8	[Y1] = f13,  2 * SIZE	(p17) fpma	f7   = ALPHA_P, f36, f84	}	{ .mmi	(p16) ldf8	f50  = [X], 2 * SIZE	(p16) ldf8	f119 = [Y], 2 * SIZE	}	;;/* 14 */	{ .mmf	(p18) stf8	[Y1] = f14,  2 * SIZE	(p17) fpma	f10  = ALPHA_P, f39, f87	}	{ .mmi	(p16) ldf8	f53  = [X], 2 * SIZE	(p16) ldf8	f122 = [Y], 2 * SIZE	}	;;/*15 */	{ .mmf	(p18) stf8	[Y1] = f15, 2 * SIZE	(p17) fpma	f11  = ALPHA_P, f42, f90	}	{ .mmb	(p16) ldf8	f56 = [X], 2 * SIZE	(p16) ldf8	f125 = [Y], 2 * SIZE	br.ctop.sptk.few .L22	}	;;	.align 32.L25:	{ .mmi	(p11) ldf8	f32 = [X], 2 * SIZE	(p11) ldf8	f33 = [Y], 2 * SIZE	mov	pr = PR, -65474	}	;;	{ .mmi	(p11) ldf8	f34 = [X], 2 * SIZE	(p11) ldf8	f35 = [Y], 2 * SIZE	mov	ar.lc  = ARLC	}	;;	{ .mmb	(p11) ldf8	f36 = [X], 2 * SIZE	(p11) ldf8	f37 = [Y], 2 * SIZE	(p9) br.ret.sptk.many b0	}	;;	{ .mmi	(p11) ldf8	f38 = [X], 2 * SIZE	(p11) ldf8	f39 = [Y], 2 * SIZE	tbit.z	p0, p12 = N, 3	}	;;	{ .mmi	(p11) ldf8	f40 = [X], 2 * SIZE	(p11) ldf8	f41 = [Y], 2 * SIZE	tbit.z	p0, p13 = N, 2	}	;;	{ .mmi	(p11) ldf8	f42 = [X], 2 * SIZE	(p11) ldf8	f43 = [Y], 2 * SIZE	tbit.z	p0, p14 = N, 1	}	;;	{ .mmf	(p11) ldf8	f44 = [X], 2 * SIZE	(p11) ldf8	f45 = [Y], 2 * SIZE	(p11) fpma	f6  = ALPHA_P, f32, f33	}	;;	{ .mmf	(p11) ldf8	f46 = [X], 2 * SIZE	(p11) ldf8	f47 = [Y], 2 * SIZE	(p11) fpma	f7  = ALPHA_P, f34, f35	}	;;	{ .mmf	(p12) ldf8	f48 = [X], 2 * SIZE	(p12) ldf8	f49 = [Y], 2 * SIZE	(p11) fpma	f10 = ALPHA_P, f36, f37	}	;;	{ .mmi	(p11) stf8	[Y1] = f6,   2 * SIZE	nop.m 0	tbit.z	p0, p15 = N, 0	}	{ .mmf	(p12) ldf8	f50 = [X], 2 * SIZE	(p12) ldf8	f51 = [Y], 2 * SIZE	(p11) fpma	f11 = ALPHA_P, f38, f39	}	;;	{ .mmi	(p11) stf8	[Y1] = f7,   2 * SIZE	nop.m 0	nop.i 0	}	{ .mmf	(p12) ldf8	f52 = [X], 2 * SIZE	(p12) ldf8	f53 = [Y], 2 * SIZE	}	;;	{ .mmi	(p11) stf8	[Y1] = f10,  2 * SIZE	nop.m 0	nop.i 0	}	{ .mmf	(p12) ldf8	f54 = [X], 2 * SIZE	(p12) ldf8	f55 = [Y], 2 * SIZE	(p11) fpma	f12 = ALPHA_P, f40, f41	}	;;	{ .mmi	(p11) stf8	[Y1] = f11,  2 * SIZE	nop.m 0	nop.i 0	}	{ .mmf	(p13) ldf8	f56 = [X], 2 * SIZE	(p13) ldf8	f57 = [Y], 2 * SIZE	(p11) fpma	f13 = ALPHA_P, f42, f43	}	;;	{ .mmi	(p11) stf8	[Y1] = f12,  2 * SIZE	nop.m 0	nop.i 0	}	{ .mmf	(p13) ldf8	f58 = [X], 2 * SIZE	(p13) ldf8	f59 = [Y], 2 * SIZE	(p11) fpma	f14 = ALPHA_P, f44, f45	}	;;	{ .mmi	(p11) stf8	[Y1] = f13,  2 * SIZE	nop.m 0	nop.i 0	}	{ .mmf	(p14) ldf8	f60 = [X], 2 * SIZE	(p14) ldf8	f61 = [Y], 2 * SIZE	(p11) fpma	f15 = ALPHA_P, f46, f47	}	;;	{ .mmi	(p11) stf8	[Y1] = f14,  2 * SIZE	nop.m 0	nop.i 0	}	{ .mmf	(p15) ldfs	f62  = [X]	(p15) ldfs	f63  = [Y]	(p12) fpma	f6  = ALPHA_P, f48, f49	}	;;	(p12) fpma	f7  = ALPHA_P, f50, f51	(p12) fpma	f10 = ALPHA_P, f52, f53	;;	(p11) stf8	[Y1] = f15,  2 * SIZE	(p12) fpma	f11 = ALPHA_P, f54, f55	;;	(p12) stf8	[Y1] = f6,   2 * SIZE	(p13) fpma	f12 = ALPHA_P, f56, f57	;;	(p12) stf8	[Y1] = f7,   2 * SIZE	(p13) fpma	f13 = ALPHA_P, f58, f59	;;	(p12) stf8	[Y1] = f10,  2 * SIZE	(p14) fpma	f14 = ALPHA_P, f60, f61	;;	(p12) stf8	[Y1] = f11,  2 * SIZE	(p15) FMA	f15 = ALPHA,   f62, f63	;;	(p13) stf8	[Y1] = f12,  2 * SIZE	;;	(p13) stf8	[Y1] = f13,  2 * SIZE	;;	(p14) stf8	[Y1] = f14,  2 * SIZE	;;	(p15) stfs	[Y1] = f15

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -