⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 zgemv_t.s

📁 Optimized GotoBLAS libraries
💻 S
📖 第 1 页 / 共 2 页
字号:
/*********************************************************************//*                                                                   *//*             Optimized BLAS libraries                              *//*                     By Kazushige Goto <kgoto@tacc.utexas.edu>     *//*                                                                   *//* Copyright (c) The University of Texas, 2005. All rights reserved. *//* UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING  *//* THIS SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF      *//* MERCHANTABILITY, FITNESS FOR ANY PARTICULAR PURPOSE,              *//* NON-INFRINGEMENT AND WARRANTIES OF PERFORMANCE, AND ANY WARRANTY  *//* THAT MIGHT OTHERWISE ARISE FROM COURSE OF DEALING OR USAGE OF     *//* TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH RESPECT TO   *//* THE USE OF THE SOFTWARE OR DOCUMENTATION.                         *//* Under no circumstances shall University be liable for incidental, *//* special, indirect, direct or consequential damages or loss of     *//* profits, interruption of business, or related expenses which may  *//* arise from use of Software or Documentation, including but not    *//* limited to those resulting from defects in Software and/or        *//* Documentation, or loss or inaccuracy of data of any kind.         *//*********************************************************************/#define ASSEMBLER#include "common.h"#ifndef NEEDPARAM#ifndef DOUBLE#include "cparam_t.h"#else#include "zparam_t.h"#endif#endif#define P 1024#ifndef __64BIT__#define STACKSIZE 224#else#define STACKSIZE 304#endif#ifdef linux#ifndef __64BIT__#define M	r3#define	N	r4#define A	r6#define LDA	r7#define X	r8#define	INCX	r9#define	Y	r10#define	INCY	r5#else#define M	r3#define	N	r4#define A	r8#define LDA	r9#define X	r10#define	INCX	r5#define	Y	r6#define	INCY	r7#endif#endif#if defined(_AIX) || defined(__APPLE__)#if !defined(__64BIT__) && defined(DOUBLE)#define M	r3#define	N	r4#define A	r10#define LDA	r5#define X	r6#define	INCX	r7#define	Y	r8#define	INCY	r9#else#define M	r3#define	N	r4#define A	r8#define LDA	r9#define X	r10#define	INCX	r5#define	Y	r6#define	INCY	r7#endif#endif#define	BUFFER	r11#define	XP	r12#define	MIN_N	r14#define	J	r15#define CO	r16#define	BO	r17#define	PLDA_M	r18#define	AO1	r19#define	AO2	r20#define	AO3	r21#define	AO4	r22#define IS	r23#define	PREA	r24#define	PREC	r25#define	Y1	r23   /* dummy; should be same as gemv_n.S */#define Y2	r24   /* dummy; should be same as gemv_n.S */#if defined(PPCG4)#define PREFETCHSIZE_A  34#define PREFETCHSIZE_C  16#endif#if defined(PPC440) || defined(PPC440FP2)#define PREFETCHSIZE_A  34#define PREFETCHSIZE_C  16#endif#ifdef PPC970#define PREFETCHSIZE_A  56#define PREFETCHSIZE_C  16#endif#ifdef CELL#define PREFETCHSIZE_A  56#define PREFETCHSIZE_C  16#endif#ifdef POWER4#define PREFETCHSIZE_A  34#define PREFETCHSIZE_C  16#endif#ifdef POWER5#define PREFETCHSIZE_A  24#define PREFETCHSIZE_C   8#endif#if !(defined(CONJ) && defined(XCONJ))#define FMADDR FMADD#define FMSUBR FNMSUB#else#define FMADDR FNMSUB#define FMSUBR FMADD#endif#ifndef NEEDPARAM#ifndef __64BIT__#define FZERO	200(SP)#define ALPHA_R 208(SP)#define ALPHA_I 216(SP)#else#define FZERO	256(SP)#define ALPHA_R 264(SP)#define ALPHA_I 272(SP)#endif	PROLOGUE	PROFCODE	addi	SP, SP,  -STACKSIZE	li	r0,   0	stfd	f14,     0(SP)	stfd	f15,     8(SP)	stfd	f16,    16(SP)	stfd	f17,    24(SP)	stfd	f18,    32(SP)	stfd	f19,    40(SP)	stfd	f20,    48(SP)	stfd	f21,    56(SP)	stfd	f22,    64(SP)	stfd	f23,    72(SP)	stfd	f24,    80(SP)	stfd	f25,    88(SP)	stfd	f26,    96(SP)	stfd	f27,   104(SP)	stfd	f28,   112(SP)	stfd	f29,   120(SP)	stfd	f30,   128(SP)	stfd	f31,   136(SP)#ifdef __64BIT__	std	r14,   144(SP)	std	r15,   152(SP)	std	r16,   160(SP)	std	r17,   168(SP)	std	r18,   176(SP)	std	r19,   184(SP)	std	r20,   192(SP)	std	r21,   200(SP)	std	r22,   208(SP)	std	r23,   216(SP)	std	r24,   224(SP)	std	r25,   232(SP)	std	r0,    FZERO#else	stw	r14,   144(SP)	stw	r15,   148(SP)	stw	r16,   152(SP)	stw	r17,   156(SP)	stw	r18,   160(SP)	stw	r19,   164(SP)	stw	r20,   168(SP)	stw	r21,   172(SP)	stw	r22,   176(SP)	stw	r23,   180(SP)	stw	r24,   184(SP)	stw	r25,   188(SP)	stw	r0,    FZERO	stw	r0,    4 + FZERO#endif#ifdef linux#ifndef __64BIT__	lwz	INCY,	  8 + STACKSIZE(SP)	lwz	BUFFER,  12 + STACKSIZE(SP)#else	ld	INCX,    112 + STACKSIZE(SP)	ld	Y,       120 + STACKSIZE(SP)	ld	INCY,    128 + STACKSIZE(SP)	ld	BUFFER,  136 + STACKSIZE(SP)#endif#endif#if defined(_AIX) || defined(__APPLE__)#ifndef __64BIT__#ifdef DOUBLE	lwz	LDA,     56 + STACKSIZE(SP)	lwz	X,       60 + STACKSIZE(SP)	lwz	INCX,    64 + STACKSIZE(SP)	lwz	Y,       68 + STACKSIZE(SP)	lwz	INCY,    72 + STACKSIZE(SP)	lwz	BUFFER,  76 + STACKSIZE(SP)#else	lwz	INCX,    56 + STACKSIZE(SP)	lwz	Y,       60 + STACKSIZE(SP)	lwz	INCY,    64 + STACKSIZE(SP)	lwz	BUFFER,  68 + STACKSIZE(SP)#endif#else	ld	INCX,    112 + STACKSIZE(SP)	ld	Y,       120 + STACKSIZE(SP)	ld	INCY,    128 + STACKSIZE(SP)	ld	BUFFER,  136 + STACKSIZE(SP)#endif#endif	stfd	f1, ALPHA_R	stfd	f2, ALPHA_I		mullw	PLDA_M, LDA, N	li	XP,  P	subf	PLDA_M, XP, PLDA_M	slwi	PLDA_M, PLDA_M, ZBASE_SHIFT	slwi	LDA,  LDA,  ZBASE_SHIFT	slwi	INCX, INCX, ZBASE_SHIFT	slwi	INCY, INCY, ZBASE_SHIFT	li	IS,  0	li	PREA, PREFETCHSIZE_A * SIZE	li	PREC, PREFETCHSIZE_C * SIZE	cmpwi	cr0, M, 0	ble	LL(End)	cmpwi	cr0, N, 0	ble	LL(End)	.align 4LL(ISLoop):	subf	MIN_N, IS, M	slwi	r0, IS, ZBASE_SHIFT	cmpi	cr0, 0, MIN_N, P	ble+	LL(min_nP)	li	MIN_N, PLL(min_nP):	add	XP, X,  r0	cmpwi	cr0, INCX, 2 * SIZE	beq	LL(Main)	mr	XP, BUFFER	addi	CO, BUFFER, -SIZE	srawi.	r0, MIN_N, 2	mtspr	CTR, r0	ble	LL(CopyRemain)	.align 4LL(CopyKernel):	LFD	f0, 0 * SIZE(X)	LFD	f1, 1 * SIZE(X)	add	X, X, INCX	LFD	f2, 0 * SIZE(X)	LFD	f3, 1 * SIZE(X)	add	X, X, INCX	LFD	f4, 0 * SIZE(X)	LFD	f5, 1 * SIZE(X)	add	X, X, INCX	LFD	f6, 0 * SIZE(X)	LFD	f7, 1 * SIZE(X)	add	X, X, INCX	STFD	f0,  1 * SIZE(CO)	STFD	f1,  2 * SIZE(CO)	STFD	f2,  3 * SIZE(CO)	STFD	f3,  4 * SIZE(CO)	STFD	f4,  5 * SIZE(CO)	STFD	f5,  6 * SIZE(CO)	STFD	f6,  7 * SIZE(CO)	STFDU	f7,  8 * SIZE(CO)	bdnz	LL(CopyKernel)	.align 4LL(CopyRemain):	andi.	r0, MIN_N, 3	mtspr	CTR, r0	ble	LL(Main)	.align 4LL(CopySub):	LFD	f0, 0 * SIZE(X)	LFD	f1, 1 * SIZE(X)	add	X, X, INCX	STFD	f0,  1 * SIZE(CO)	STFDU	f1,  2 * SIZE(CO)	bdnz	LL(CopySub)	.align 4LL(Main):	mr	CO, Y	addi	XP, XP, -SIZE	srawi.	J, N, 2	ble	LL(Remain)	.align 4LL(MainHead):	mr     AO1, A	add    AO2, A,   LDA	add    AO3, AO2, LDA	add    AO4, AO3, LDA	add    A,   AO4, LDA	mr     BO, XP	lfd	 f0,  FZERO	fmr	 f1,  f0	fmr	 f2,  f0	fmr	 f3,  f0	fmr	 f4,  f0	fmr	 f5,  f0	fmr	 f6,  f0	fmr	 f7,  f0	fmr	 f8,  f0	fmr	 f9,  f0	fmr	 f10, f0	fmr	 f11, f0	fmr	 f12, f0	fmr	 f13, f0	fmr	 f14, f0	fmr	 f15, f0	dcbtst	 PREC, CO	srawi.	r0,  MIN_N, 3	mtspr	CTR, r0	ble	LL(MainN3)	LFD	f16, 0 * SIZE(AO1)	LFD	f17, 1 * SIZE(AO1)	LFD	f18, 0 * SIZE(AO2)	LFD	f19, 1 * SIZE(AO2)	LFD	f20, 0 * SIZE(AO3)	LFD	f21, 1 * SIZE(AO3)	LFD	f22, 0 * SIZE(AO4)	LFD	f23, 1 * SIZE(AO4)	LFD	f24, 1 * SIZE(BO)	LFD	f25, 2 * SIZE(BO)	LFD	f26, 3 * SIZE(BO)	LFD	f27, 4 * SIZE(BO)	LFD	f28, 5 * SIZE(BO)	LFD	f29, 6 * SIZE(BO)	LFD	f30, 7 * SIZE(BO)	LFD	f31, 8 * SIZE(BO)	bdz	LL(MainKernelSkip)	.align 5LL(MainKernel):	FMADD	f0,  f16,  f24, f0	FMADD	f1,  f16,  f25, f1	FMADD	f2,  f17,  f24, f2	FMADD	f3,  f17,  f25, f3	FMADD	f4,  f18,  f24, f4	FMADD	f5,  f18,  f25, f5	FMADD	f6,  f19,  f24, f6	FMADD	f7,  f19,  f25, f7	LFD	f16, 2 * SIZE(AO1)	LFD	f17, 3 * SIZE(AO1)	LFD	f18, 2 * SIZE(AO2)	LFD	f19, 3 * SIZE(AO2)	FMADD	f8,  f20,  f24, f8	FMADD	f9,  f20,  f25, f9	FMADD	f10, f21,  f24, f10	FMADD	f11, f21,  f25, f11	FMADD	f12, f22,  f24, f12	FMADD	f13, f22,  f25, f13	FMADD	f14, f23,  f24, f14	FMADD	f15, f23,  f25, f15	LFD	f20, 2 * SIZE(AO3)	LFD	f21, 3 * SIZE(AO3)	LFD	f22, 2 * SIZE(AO4)	LFD	f23, 3 * SIZE(AO4)	FMADD	f0,  f16,  f26, f0	FMADD	f1,  f16,  f27, f1	FMADD	f2,  f17,  f26, f2	FMADD	f3,  f17,  f27, f3	FMADD	f4,  f18,  f26, f4	FMADD	f5,  f18,  f27, f5	FMADD	f6,  f19,  f26, f6	FMADD	f7,  f19,  f27, f7	LFD	f16, 4 * SIZE(AO1)	LFD	f17, 5 * SIZE(AO1)	LFD	f18, 4 * SIZE(AO2)	LFD	f19, 5 * SIZE(AO2)	FMADD	f8,  f20,  f26, f8	FMADD	f9,  f20,  f27, f9	FMADD	f10, f21,  f26, f10	FMADD	f11, f21,  f27, f11	FMADD	f12, f22,  f26, f12	FMADD	f13, f22,  f27, f13	FMADD	f14, f23,  f26, f14	FMADD	f15, f23,  f27, f15	LFD	f20, 4 * SIZE(AO3)	LFD	f21, 5 * SIZE(AO3)	LFD	f22, 4 * SIZE(AO4)	LFD	f23, 5 * SIZE(AO4)	LFD	f24,  9 * SIZE(BO)	LFD	f25, 10 * SIZE(BO)	LFD	f26, 11 * SIZE(BO)	LFD	f27, 12 * SIZE(BO)	FMADD	f0,  f16,  f28, f0	FMADD	f1,  f16,  f29, f1	FMADD	f2,  f17,  f28, f2	FMADD	f3,  f17,  f29, f3	FMADD	f4,  f18,  f28, f4	FMADD	f5,  f18,  f29, f5	FMADD	f6,  f19,  f28, f6	FMADD	f7,  f19,  f29, f7	LFD	f16, 6 * SIZE(AO1)	LFD	f17, 7 * SIZE(AO1)	LFD	f18, 6 * SIZE(AO2)	LFD	f19, 7 * SIZE(AO2)	FMADD	f8,  f20,  f28, f8	FMADD	f9,  f20,  f29, f9	FMADD	f10, f21,  f28, f10	FMADD	f11, f21,  f29, f11	FMADD	f12, f22,  f28, f12	FMADD	f13, f22,  f29, f13	FMADD	f14, f23,  f28, f14	FMADD	f15, f23,  f29, f15	LFD	f20, 6 * SIZE(AO3)	LFD	f21, 7 * SIZE(AO3)	LFD	f22, 6 * SIZE(AO4)	LFD	f23, 7 * SIZE(AO4)	FMADD	f0,  f16,  f30, f0	FMADD	f1,  f16,  f31, f1	FMADD	f2,  f17,  f30, f2	FMADD	f3,  f17,  f31, f3	FMADD	f4,  f18,  f30, f4	FMADD	f5,  f18,  f31, f5	FMADD	f6,  f19,  f30, f6	FMADD	f7,  f19,  f31, f7	LFD	f16, 8 * SIZE(AO1)	LFD	f17, 9 * SIZE(AO1)	LFD	f18, 8 * SIZE(AO2)	LFD	f19, 9 * SIZE(AO2)	FMADD	f8,  f20,  f30, f8	FMADD	f9,  f20,  f31, f9	FMADD	f10, f21,  f30, f10	FMADD	f11, f21,  f31, f11	FMADD	f12, f22,  f30, f12	FMADD	f13, f22,  f31, f13	FMADD	f14, f23,  f30, f14	FMADD	f15, f23,  f31, f15	LFD	f20, 8 * SIZE(AO3)	LFD	f21, 9 * SIZE(AO3)	LFD	f22, 8 * SIZE(AO4)	LFD	f23, 9 * SIZE(AO4)	LFD	f28, 13 * SIZE(BO)	LFD	f29, 14 * SIZE(BO)	LFD	f30, 15 * SIZE(BO)	LFD	f31, 16 * SIZE(BO)	FMADD	f0,  f16,  f24, f0	FMADD	f1,  f16,  f25, f1	FMADD	f2,  f17,  f24, f2	FMADD	f3,  f17,  f25, f3	FMADD	f4,  f18,  f24, f4	FMADD	f5,  f18,  f25, f5	FMADD	f6,  f19,  f24, f6	FMADD	f7,  f19,  f25, f7	LFD	f16, 10 * SIZE(AO1)	LFD	f17, 11 * SIZE(AO1)	LFD	f18, 10 * SIZE(AO2)	LFD	f19, 11 * SIZE(AO2)	FMADD	f8,  f20,  f24, f8	FMADD	f9,  f20,  f25, f9	FMADD	f10, f21,  f24, f10	FMADD	f11, f21,  f25, f11	FMADD	f12, f22,  f24, f12	FMADD	f13, f22,  f25, f13	FMADD	f14, f23,  f24, f14	FMADD	f15, f23,  f25, f15	LFD	f20, 10 * SIZE(AO3)	LFD	f21, 11 * SIZE(AO3)	LFD	f22, 10 * SIZE(AO4)	LFD	f23, 11 * SIZE(AO4)	FMADD	f0,  f16,  f26, f0	FMADD	f1,  f16,  f27, f1	FMADD	f2,  f17,  f26, f2	FMADD	f3,  f17,  f27, f3	FMADD	f4,  f18,  f26, f4	FMADD	f5,  f18,  f27, f5	FMADD	f6,  f19,  f26, f6	FMADD	f7,  f19,  f27, f7	LFD	f16, 12 * SIZE(AO1)	LFD	f17, 13 * SIZE(AO1)	LFD	f18, 12 * SIZE(AO2)	LFD	f19, 13 * SIZE(AO2)	FMADD	f8,  f20,  f26, f8	FMADD	f9,  f20,  f27, f9	FMADD	f10, f21,  f26, f10	FMADD	f11, f21,  f27, f11	FMADD	f12, f22,  f26, f12	FMADD	f13, f22,  f27, f13	FMADD	f14, f23,  f26, f14	FMADD	f15, f23,  f27, f15	LFD	f20, 12 * SIZE(AO3)	LFD	f21, 13 * SIZE(AO3)	LFD	f22, 12 * SIZE(AO4)	LFD	f23, 13 * SIZE(AO4)	LFD	f24, 17 * SIZE(BO)	LFD	f25, 18 * SIZE(BO)	LFD	f26, 19 * SIZE(BO)	LFD	f27, 20 * SIZE(BO)	FMADD	f0,  f16,  f28, f0	FMADD	f1,  f16,  f29, f1	FMADD	f2,  f17,  f28, f2	FMADD	f3,  f17,  f29, f3	FMADD	f4,  f18,  f28, f4	FMADD	f5,  f18,  f29, f5	FMADD	f6,  f19,  f28, f6	FMADD	f7,  f19,  f29, f7	LFD	f16, 14 * SIZE(AO1)	LFD	f17, 15 * SIZE(AO1)	LFD	f18, 14 * SIZE(AO2)	LFD	f19, 15 * SIZE(AO2)	FMADD	f8,  f20,  f28, f8	FMADD	f9,  f20,  f29, f9	FMADD	f10, f21,  f28, f10	FMADD	f11, f21,  f29, f11	FMADD	f12, f22,  f28, f12	FMADD	f13, f22,  f29, f13	FMADD	f14, f23,  f28, f14	FMADD	f15, f23,  f29, f15	LFD	f20, 14 * SIZE(AO3)	LFD	f21, 15 * SIZE(AO3)	LFD	f22, 14 * SIZE(AO4)	LFD	f23, 15 * SIZE(AO4)	FMADD	f0,  f16,  f30, f0	FMADD	f1,  f16,  f31, f1	FMADD	f2,  f17,  f30, f2	FMADD	f3,  f17,  f31, f3	FMADD	f4,  f18,  f30, f4	FMADD	f5,  f18,  f31, f5	FMADD	f6,  f19,  f30, f6	FMADD	f7,  f19,  f31, f7	LFD	f16, 16 * SIZE(AO1)	LFD	f17, 17 * SIZE(AO1)	LFD	f18, 16 * SIZE(AO2)	LFD	f19, 17 * SIZE(AO2)	addi	AO1, AO1, 16 * SIZE	addi	AO2, AO2, 16 * SIZE	PREFETCH_A1	PREFETCH_A2	FMADD	f8,  f20,  f30, f8	FMADD	f9,  f20,  f31, f9	FMADD	f10, f21,  f30, f10	FMADD	f11, f21,  f31, f11	FMADD	f12, f22,  f30, f12	FMADD	f13, f22,  f31, f13	FMADD	f14, f23,  f30, f14	FMADD	f15, f23,  f31, f15	LFD	f20, 16 * SIZE(AO3)	LFD	f21, 17 * SIZE(AO3)	LFD	f22, 16 * SIZE(AO4)	LFD	f23, 17 * SIZE(AO4)	LFD	f28, 21 * SIZE(BO)	LFD	f29, 22 * SIZE(BO)	LFD	f30, 23 * SIZE(BO)	LFD	f31, 24 * SIZE(BO)	addi	AO3, AO3, 16 * SIZE	addi	AO4, AO4, 16 * SIZE	PREFETCH_A3	PREFETCH_A4	addi	BO, BO, 16 * SIZE 	bdnz	LL(MainKernel)	.align 4	LL(MainKernelSkip):	FMADD	f0,  f16,  f24, f0	FMADD	f1,  f16,  f25, f1	FMADD	f2,  f17,  f24, f2	FMADD	f3,  f17,  f25, f3	FMADD	f4,  f18,  f24, f4	FMADD	f5,  f18,  f25, f5	FMADD	f6,  f19,  f24, f6	FMADD	f7,  f19,  f25, f7	LFD	f16, 2 * SIZE(AO1)	LFD	f17, 3 * SIZE(AO1)	LFD	f18, 2 * SIZE(AO2)	LFD	f19, 3 * SIZE(AO2)	FMADD	f8,  f20,  f24, f8	FMADD	f9,  f20,  f25, f9	FMADD	f10, f21,  f24, f10	FMADD	f11, f21,  f25, f11	FMADD	f12, f22,  f24, f12	FMADD	f13, f22,  f25, f13	FMADD	f14, f23,  f24, f14	FMADD	f15, f23,  f25, f15	LFD	f20, 2 * SIZE(AO3)	LFD	f21, 3 * SIZE(AO3)	LFD	f22, 2 * SIZE(AO4)	LFD	f23, 3 * SIZE(AO4)	FMADD	f0,  f16,  f26, f0	FMADD	f1,  f16,  f27, f1	FMADD	f2,  f17,  f26, f2	FMADD	f3,  f17,  f27, f3	FMADD	f4,  f18,  f26, f4	FMADD	f5,  f18,  f27, f5	FMADD	f6,  f19,  f26, f6	FMADD	f7,  f19,  f27, f7	LFD	f16, 4 * SIZE(AO1)	LFD	f17, 5 * SIZE(AO1)	LFD	f18, 4 * SIZE(AO2)	LFD	f19, 5 * SIZE(AO2)	FMADD	f8,  f20,  f26, f8	FMADD	f9,  f20,  f27, f9	FMADD	f10, f21,  f26, f10	FMADD	f11, f21,  f27, f11	FMADD	f12, f22,  f26, f12	FMADD	f13, f22,  f27, f13	FMADD	f14, f23,  f26, f14	FMADD	f15, f23,  f27, f15	LFD	f20, 4 * SIZE(AO3)	LFD	f21, 5 * SIZE(AO3)	LFD	f22, 4 * SIZE(AO4)	LFD	f23, 5 * SIZE(AO4)	FMADD	f0,  f16,  f28, f0	FMADD	f1,  f16,  f29, f1	FMADD	f2,  f17,  f28, f2	FMADD	f3,  f17,  f29, f3	FMADD	f4,  f18,  f28, f4	FMADD	f5,  f18,  f29, f5	FMADD	f6,  f19,  f28, f6	FMADD	f7,  f19,  f29, f7	LFD	f16, 6 * SIZE(AO1)	LFD	f17, 7 * SIZE(AO1)	LFD	f18, 6 * SIZE(AO2)	LFD	f19, 7 * SIZE(AO2)	FMADD	f8,  f20,  f28, f8	FMADD	f9,  f20,  f29, f9	FMADD	f10, f21,  f28, f10	FMADD	f11, f21,  f29, f11	FMADD	f12, f22,  f28, f12	FMADD	f13, f22,  f29, f13	FMADD	f14, f23,  f28, f14	FMADD	f15, f23,  f29, f15	LFD	f20, 6 * SIZE(AO3)	LFD	f21, 7 * SIZE(AO3)	LFD	f22, 6 * SIZE(AO4)	LFD	f23, 7 * SIZE(AO4)	FMADD	f0,  f16,  f30, f0	FMADD	f1,  f16,  f31, f1	FMADD	f2,  f17,  f30, f2	FMADD	f3,  f17,  f31, f3	FMADD	f4,  f18,  f30, f4

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -