⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 trsm_kernel_cell_rt.s

📁 Optimized GotoBLAS libraries
💻 S
📖 第 1 页 / 共 4 页
字号:
/*********************************************************************//*                                                                   *//*             Optimized BLAS libraries                              *//*                     By Kazushige Goto <kgoto@tacc.utexas.edu>     *//*                                                                   *//* Copyright (c) The University of Texas, 2005. All rights reserved. *//* UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING  *//* THIS SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF      *//* MERCHANTABILITY, FITNESS FOR ANY PARTICULAR PURPOSE,              *//* NON-INFRINGEMENT AND WARRANTIES OF PERFORMANCE, AND ANY WARRANTY  *//* THAT MIGHT OTHERWISE ARISE FROM COURSE OF DEALING OR USAGE OF     *//* TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH RESPECT TO   *//* THE USE OF THE SOFTWARE OR DOCUMENTATION.                         *//* Under no circumstances shall University be liable for incidental, *//* special, indirect, direct or consequential damages or loss of     *//* profits, interruption of business, or related expenses which may  *//* arise from use of Software or Documentation, including but not    *//* limited to those resulting from defects in Software and/or        *//* Documentation, or loss or inaccuracy of data of any kind.         *//*********************************************************************/#define ASSEMBLER#include "common.h"		#ifndef __64BIT__#define LOAD	lwz#else#define LOAD	ld#endif#ifdef __64BIT__#define STACKSIZE 320#define ALPHA   296(SP)#define FZERO	304(SP)#else#define STACKSIZE 240#define ALPHA   224(SP)#define FZERO	232(SP)#endif#define	M	r3#define	N	r4#define	K	r5#ifdef linux#ifndef __64BIT__#define A	r6#define	B	r7#define	C	r8#define	LDC	r9#define OFFSET	r10#else#define A	r7#define	B	r8#define	C	r9#define	LDC	r10#define OFFSET	r6#endif#endif#if defined(_AIX) || defined(__APPLE__)#if !defined(__64BIT__) && defined(DOUBLE)#define A	r8#define	B	r9#define	C	r10#define	LDC	r7#define OFFSET	r6#else#define A	r7#define	B	r8#define	C	r9#define	LDC	r10#define OFFSET	r6#endif#endif#define AORIG	r18#define TEMP	r19#define KK	r20#define	I	r21#define J	r22#define AO	r23#define	BO	r24#define	CO1	r25#define CO2	r26#define	CO3	r27#define	CO4	r28#define PREA	r29#define PREB	r30#define PREC	r31#ifndef NEEDPARAM#ifndef DOUBLE#include "sparam.h"#else#include "dparam.h"#endif	PROLOGUE	PROFCODE	addi	SP, SP, -STACKSIZE	li	r0, 0	stfd	f14,    0(SP)	stfd	f15,    8(SP)	stfd	f16,   16(SP)	stfd	f17,   24(SP)	stfd	f18,   32(SP)	stfd	f19,   40(SP)	stfd	f20,   48(SP)	stfd	f21,   56(SP)	stfd	f22,   64(SP)	stfd	f23,   72(SP)	stfd	f24,   80(SP)	stfd	f25,   88(SP)	stfd	f26,   96(SP)	stfd	f27,  104(SP)	stfd	f28,  112(SP)	stfd	f29,  120(SP)	stfd	f30,  128(SP)	stfd	f31,  136(SP)#ifdef __64BIT__	std	r31,  144(SP)	std	r30,  152(SP)	std	r29,  160(SP)	std	r28,  168(SP)	std	r27,  176(SP)	std	r26,  184(SP)	std	r25,  192(SP)	std	r24,  200(SP)	std	r23,  208(SP)	std	r22,  216(SP)	std	r21,  224(SP)	std	r20,  232(SP)	std	r19,  240(SP)	std	r18,  248(SP)#else	stw	r31,  144(SP)	stw	r30,  148(SP)	stw	r29,  152(SP)	stw	r28,  156(SP)	stw	r27,  160(SP)	stw	r26,  164(SP)	stw	r25,  168(SP)	stw	r24,  172(SP)	stw	r23,  176(SP)	stw	r22,  180(SP)	stw	r21,  184(SP)	stw	r20,  188(SP)	stw	r19,  192(SP)	stw	r18,  196(SP)#endif	stw	r0,  FZERO#if defined(_AIX) || defined(__APPLE__)#if !defined(__64BIT__) && defined(DOUBLE)	lwz	LDC,    56 + STACKSIZE(SP)#endif#endif	slwi	LDC, LDC, BASE_SHIFT#if defined(linux) && defined(__64BIT__)	ld	OFFSET,   112 + STACKSIZE(SP)#endif#if defined(_AIX) || defined(__APPLE__)#ifdef __64BIT__	ld	OFFSET,  112 + STACKSIZE(SP)#else#ifdef DOUBLE	lwz	OFFSET,   60 + STACKSIZE(SP)#else	lwz	OFFSET,   56 + STACKSIZE(SP)#endif#endif#endif#ifdef LN	mullw	r0, M, K	slwi	r0, r0, BASE_SHIFT	add	A, A, r0	slwi	r0, M, BASE_SHIFT	add	C, C, r0#endif#ifdef RN	neg	KK, OFFSET#endif#ifdef RT	mullw	r0, N, K	slwi	r0, r0, BASE_SHIFT	add	B, B, r0	mullw	r0, N, LDC	add	C, C, r0	sub	KK, N, OFFSET#endif	cmpwi	cr0, M, 0	ble	LL(999)	cmpwi	cr0, N, 0	ble	LL(999)	cmpwi	cr0, K, 0	ble	LL(999)#ifndef PREFETCHTEST	li	PREC,  -4 * SIZE#else#ifdef linux#ifndef __64BIT__	mr	PREA,  r10		lwz	PREB,   8 + STACKSIZE(SP)	lwz	PREC,  12 + STACKSIZE(SP)#else	ld	PREA,  112 + STACKSIZE(SP)	ld	PREB,  120 + STACKSIZE(SP)	ld	PREC,  128 + STACKSIZE(SP)#endif#endif#if defined(_AIX) || defined(__APPLE__)#ifdef __64BIT__	ld	PREA,  112 + STACKSIZE(SP)	ld	PREB,  120 + STACKSIZE(SP)	ld	PREC,  128 + STACKSIZE(SP)#else#ifdef DOUBLE	lwz	PREA,   60 + STACKSIZE(SP)	lwz	PREB,   64 + STACKSIZE(SP)	lwz	PREC,   68 + STACKSIZE(SP)#else	lwz	PREA,   56 + STACKSIZE(SP)	lwz	PREB,   60 + STACKSIZE(SP)	lwz	PREC,   64 + STACKSIZE(SP)#endif#endif#endif#endif#ifndef PREFETCHTEST#ifdef PPC970#ifdef ALLOC_HUGETLB	li	PREA,   (16 *  5 * SIZE | 1)	li	PREB,   (16 *  5 * SIZE | 3)#else	li	PREA,   (16 * 14 * SIZE | 1)	li	PREB,   (16 *  8 * SIZE | 3)#endif#endif#ifdef POWER4#ifdef ALLOC_HUGETLB	li	PREA,   (16 *  1 * SIZE + 16)	li	PREB,   (16 *  1 * SIZE + 16)#else	li	PREA,   (16 *  2 * SIZE + 16)	li	PREB,   (16 *  2 * SIZE + 16)#endif#endif#ifdef POWER5#ifdef ALLOC_HUGETLB	li	PREA,   (16 *  7 * SIZE | 1)	li	PREB,   (16 *  7 * SIZE | 3)#else	li	PREA,   (16 * 12 * SIZE | 1)	li	PREB,   (16 *  6 * SIZE | 3)#endif#endif#ifdef CELL	li	PREA,   (16 * 12 * SIZE)	li	PREB,   (16 * 12 * SIZE)#endif#endif	lfs	f0, FZEROLL(70):	andi.	J, N,  1	ble	LL(40)#ifdef RT	slwi	r0, K, 0 + BASE_SHIFT	sub	B, B, r0	sub	C, C, LDC#endif	mr	CO1, C#ifdef LN	add	KK, M, OFFSET#endif#ifdef LT	mr	KK, OFFSET#endif 	fmr	f1,  f0	fmr	f2,  f0	fmr	f3,  f0	srawi.	I, M,  2#if defined(LN) || defined(RT)	mr	AORIG, A#else	mr	AO, A#endif#ifndef RT	add	C,  CO1, LDC#endif	ble	LL(80)	.align 4LL(71):#if defined(LT) || defined(RN)	LFD	f16,  0 * SIZE(AO)	LFD	f17,  1 * SIZE(AO)	LFD	f18,  2 * SIZE(AO)	LFD	f19,  3 * SIZE(AO)	LFD	f20,  0 * SIZE(B)	LFD	f21,  1 * SIZE(B)	LFD	f22,  2 * SIZE(B)	LFD	f23,  3 * SIZE(B)	dcbt	CO1, PREC	srawi.	r0, KK,  2	mtspr	CTR, r0	mr	BO,  B#else#ifdef LN	slwi	r0,   K,  2 + BASE_SHIFT	sub	AORIG, AORIG, r0#endif	slwi	r0,   KK, 2 + BASE_SHIFT	slwi	TEMP, KK, 0 + BASE_SHIFT	add	AO, AORIG, r0	add	BO, B,     TEMP	sub	TEMP, K, KK	LFD	f16,  0 * SIZE(AO)	LFD	f17,  1 * SIZE(AO)	LFD	f18,  2 * SIZE(AO)	LFD	f19,  3 * SIZE(AO)	LFD	f20,  0 * SIZE(BO)	LFD	f21,  1 * SIZE(BO)	LFD	f22,  2 * SIZE(BO)	LFD	f23,  3 * SIZE(BO)	dcbt	CO1, PREC	srawi.	r0, TEMP,  2	mtspr	CTR, r0#endif	ble	LL(75)	.align 5LL(72):	FMADD	f0,  f16, f20, f0	FMADD	f1,  f17, f20, f1	FMADD	f2,  f18, f20, f2	FMADD	f3,  f19, f20, f3	LFD	f16,  4 * SIZE(AO)	LFD	f17,  5 * SIZE(AO)	LFD	f18,  6 * SIZE(AO)	LFD	f19,  7 * SIZE(AO)	FMADD	f0,  f16, f21, f0	FMADD	f1,  f17, f21, f1	FMADD	f2,  f18, f21, f2	FMADD	f3,  f19, f21, f3	LFD	f16,  8 * SIZE(AO)	LFD	f17,  9 * SIZE(AO)	LFD	f18, 10 * SIZE(AO)	LFD	f19, 11 * SIZE(AO)	FMADD	f0,  f16, f22, f0	FMADD	f1,  f17, f22, f1	FMADD	f2,  f18, f22, f2	FMADD	f3,  f19, f22, f3	LFD	f16, 12 * SIZE(AO)	LFD	f17, 13 * SIZE(AO)	LFD	f18, 14 * SIZE(AO)	LFD	f19, 15 * SIZE(AO)	FMADD	f0,  f16, f23, f0	FMADD	f1,  f17, f23, f1	FMADD	f2,  f18, f23, f2	FMADD	f3,  f19, f23, f3	LFD	f16, 16 * SIZE(AO)	LFD	f17, 17 * SIZE(AO)	LFD	f18, 18 * SIZE(AO)	LFD	f19, 19 * SIZE(AO)	LFD	f20,  4 * SIZE(BO)	LFD	f21,  5 * SIZE(BO)	LFD	f22,  6 * SIZE(BO)	LFD	f23,  7 * SIZE(BO)	addi	AO, AO, 16 * SIZE	addi	BO, BO,  4 * SIZE	PREFETCH_B	bdnz	LL(72)	.align 4LL(75):#if defined(LT) || defined(RN)	andi.	r0, KK,  3#else	andi.	r0, TEMP, 3#endif	mtspr	CTR, r0	ble+	LL(78)	.align 4LL(76):	FMADD	f0,  f16, f20, f0	FMADD	f1,  f17, f20, f1	FMADD	f2,  f18, f20, f2	FMADD	f3,  f19, f20, f3	LFD	f16,  4 * SIZE(AO)	LFD	f17,  5 * SIZE(AO)	LFD	f18,  6 * SIZE(AO)	LFD	f19,  7 * SIZE(AO)	LFD	f20,  1 * SIZE(BO)	addi	BO, BO,  1 * SIZE	addi	AO, AO,  4 * SIZE	bdnz	LL(76)	.align 4LL(78):#if defined(LN) || defined(RT)#ifdef LN	subi	r0, KK, 4#else	subi	r0, KK, 1#endif	slwi	TEMP, r0, 2 + BASE_SHIFT	slwi	r0,   r0, 0 + BASE_SHIFT	add	AO, AORIG, TEMP	add	BO, B,     r0#endif#if defined(LN) || defined(LT)	LFD	f16,  0 * SIZE(BO)	LFD	f20,  1 * SIZE(BO)	LFD	f24,  2 * SIZE(BO)	LFD	f28,  3 * SIZE(BO)	FSUB	f0,  f16, f0	FSUB	f1,  f20, f1	FSUB	f2,  f24, f2	FSUB	f3,  f28, f3#else	LFD	f16,  0 * SIZE(AO)	LFD	f17,  1 * SIZE(AO)	LFD	f18,  2 * SIZE(AO)	LFD	f19,  3 * SIZE(AO) 	FSUB	f0,  f16, f0	FSUB	f1,  f17, f1	FSUB	f2,  f18, f2	FSUB	f3,  f19, f3#endif#ifdef LN	LFD	f16, 15 * SIZE(AO)	LFD	f17, 14 * SIZE(AO)	LFD	f18, 13 * SIZE(AO)	LFD	f19, 12 * SIZE(AO)	FMUL	f3,  f16, f3	FNMSUB	f2,  f17, f3,  f2	FNMSUB	f1,  f18, f3,  f1	FNMSUB	f0,  f19, f3,  f0	LFD	f16, 10 * SIZE(AO)	LFD	f17,  9 * SIZE(AO)	LFD	f18,  8 * SIZE(AO)	LFD	f19,  5 * SIZE(AO)	LFD	f20,  4 * SIZE(AO)	LFD	f21,  0 * SIZE(AO)	FMUL	f2,  f16, f2	FNMSUB	f1,  f17, f2,  f1	FNMSUB	f0,  f18, f2,  f0	FMUL	f1,  f19, f1	FNMSUB	f0,  f20, f1,  f0	FMUL	f0,  f21, f0#endif#ifdef LT	LFD	f16,  0 * SIZE(AO)	LFD	f17,  1 * SIZE(AO)	LFD	f18,  2 * SIZE(AO)	LFD	f19,  3 * SIZE(AO)	FMUL	f0,  f16, f0	FNMSUB	f1,  f17, f0,  f1	FNMSUB	f2,  f18, f0,  f2	FNMSUB	f3,  f19, f0,  f3	LFD	f17,  5 * SIZE(AO)	LFD	f18,  6 * SIZE(AO)	LFD	f19,  7 * SIZE(AO)	FMUL	f1,  f17, f1	FNMSUB	f2,  f18, f1,  f2	FNMSUB	f3,  f19, f1,  f3	LFD	f18, 10 * SIZE(AO)	LFD	f19, 11 * SIZE(AO)	FMUL	f2,  f18, f2	FNMSUB	f3,  f19, f2,  f3	LFD	f19, 15 * SIZE(AO)	FMUL	f3,  f19, f3#endif#ifdef RN	LFD	f16,  0 * SIZE(BO)	FMUL	f0,  f16, f0	FMUL	f1,  f16, f1	FMUL	f2,  f16, f2	FMUL	f3,  f16, f3#endif#ifdef RT	LFD	f21,  0 * SIZE(BO)	FMUL	f0,  f21, f0	FMUL	f1,  f21, f1	FMUL	f2,  f21, f2	FMUL	f3,  f21, f3#endif#ifdef LN	subi	CO1, CO1, 4 * SIZE#endif#if defined(LN) || defined(LT)	STFD	f0,   0 * SIZE(BO)	STFD	f1,   1 * SIZE(BO)	STFD	f2,   2 * SIZE(BO)	STFD	f3,   3 * SIZE(BO)#else	STFD	f0,   0 * SIZE(AO)	STFD	f1,   1 * SIZE(AO)	STFD	f2,   2 * SIZE(AO)	STFD	f3,   3 * SIZE(AO)#endif	STFD	f0,   0 * SIZE(CO1)	STFD	f1,   1 * SIZE(CO1)	STFD	f2,   2 * SIZE(CO1)	STFD	f3,   3 * SIZE(CO1)	lfs	f0,  FZERO 	fmr	f1,  f0	fmr	f2,  f0	fmr	f3,  f0#ifndef LN	addi	CO1, CO1, 4 * SIZE#endif#ifdef RT	slwi	r0, K, 2 + BASE_SHIFT	add	AORIG, AORIG, r0#endif#if defined(LT) || defined(RN)	sub	TEMP, K, KK	slwi	r0,   TEMP, 2 + BASE_SHIFT	slwi	TEMP, TEMP, 0 + BASE_SHIFT	add	AO, AO, r0	add	BO, BO, TEMP#endif#ifdef LN	subi	KK, KK, 4#endif#ifdef LT	addi	KK, KK, 4#endif	addic.	I, I, -1	bgt+	LL(71)	.align 4LL(80):	andi.	I,  M,  2	ble	LL(90)#if defined(LT) || defined(RN)	LFD	f16,  0 * SIZE(AO)	LFD	f17,  1 * SIZE(AO)	LFD	f18,  2 * SIZE(AO)	LFD	f19,  3 * SIZE(AO)	LFD	f20,  0 * SIZE(B)	LFD	f21,  1 * SIZE(B)	LFD	f22,  2 * SIZE(B)	LFD	f23,  3 * SIZE(B)	srawi.	r0, KK,  2	mtspr	CTR, r0	mr	BO,  B#else#ifdef LN	slwi	r0,   K,  1 + BASE_SHIFT	sub	AORIG, AORIG, r0#endif	slwi	r0,   KK, 1 + BASE_SHIFT	slwi	TEMP, KK, 0 + BASE_SHIFT	add	AO, AORIG, r0	add	BO, B,     TEMP	sub	TEMP, K, KK	LFD	f16,  0 * SIZE(AO)	LFD	f17,  1 * SIZE(AO)	LFD	f18,  2 * SIZE(AO)	LFD	f19,  3 * SIZE(AO)	LFD	f20,  0 * SIZE(BO)	LFD	f21,  1 * SIZE(BO)	LFD	f22,  2 * SIZE(BO)	LFD	f23,  3 * SIZE(BO)	srawi.	r0, TEMP,  2	mtspr	CTR, r0#endif	ble	LL(85)	.align 5LL(82):	FMADD	f0,  f16, f20, f0	FMADD	f1,  f17, f20, f1	FMADD	f2,  f18, f21, f2	FMADD	f3,  f19, f21, f3	LFD	f16,  4 * SIZE(AO)	LFD	f17,  5 * SIZE(AO)	LFD	f18,  6 * SIZE(AO)	LFD	f19,  7 * SIZE(AO)	FMADD	f0,  f16, f22, f0	FMADD	f1,  f17, f22, f1	FMADD	f2,  f18, f23, f2	FMADD	f3,  f19, f23, f3	LFD	f16,  8 * SIZE(AO)	LFD	f17,  9 * SIZE(AO)	LFD	f18, 10 * SIZE(AO)	LFD	f19, 11 * SIZE(AO)	LFD	f20,  4 * SIZE(BO)	LFD	f21,  5 * SIZE(BO)	LFD	f22,  6 * SIZE(BO)	LFD	f23,  7 * SIZE(BO)	addi	AO, AO,  8 * SIZE	addi	BO, BO,  4 * SIZE	PREFETCH_B	bdnz	LL(82)	.align 4LL(85):#if defined(LT) || defined(RN)	andi.	r0, KK,  3#else	andi.	r0, TEMP, 3#endif	mtspr	CTR, r0	ble+	LL(88)	.align 4LL(86):	FMADD	f0,  f16, f20, f0	FMADD	f1,  f17, f20, f1	LFD	f16,  2 * SIZE(AO)	LFD	f17,  3 * SIZE(AO)	LFD	f20,  1 * SIZE(BO)	addi	BO, BO,  1 * SIZE	addi	AO, AO,  2 * SIZE	bdnz	LL(86)	.align 4LL(88):	FADD	f0, f2, f0	FADD	f1, f3, f1#if defined(LN) || defined(RT)#ifdef LN	subi	r0, KK, 2#else	subi	r0, KK, 1#endif	slwi	TEMP, r0, 1 + BASE_SHIFT	slwi	r0,   r0, 0 + BASE_SHIFT	add	AO, AORIG, TEMP	add	BO, B,     r0#endif#if defined(LN) || defined(LT)	LFD	f16,  0 * SIZE(BO)	LFD	f20,  1 * SIZE(BO)	FSUB	f0,  f16, f0	FSUB	f1,  f20, f1#else	LFD	f16,  0 * SIZE(AO)	LFD	f17,  1 * SIZE(AO)	FSUB	f0,  f16, f0	FSUB	f1,  f17, f1#endif#ifdef LN	LFD	f19,  3 * SIZE(AO)	LFD	f20,  2 * SIZE(AO)	LFD	f21,  0 * SIZE(AO)	FMUL	f1,  f19, f1	FNMSUB	f0,  f20, f1,  f0	FMUL	f0,  f21, f0#endif#ifdef LT	LFD	f16,  0 * SIZE(AO)	LFD	f17,  1 * SIZE(AO)	FMUL	f0,  f16, f0	FNMSUB	f1,  f17, f0,  f1	LFD	f17,  3 * SIZE(AO)	FMUL	f1,  f17, f1#endif#ifdef RN	LFD	f16,  0 * SIZE(BO)	FMUL	f0,  f16, f0	FMUL	f1,  f16, f1#endif#ifdef RT	LFD	f21,  0 * SIZE(BO)	FMUL	f0,  f21, f0	FMUL	f1,  f21, f1#endif#ifdef LN	subi	CO1, CO1, 2 * SIZE#endif#if defined(LN) || defined(LT)	STFD	f0,   0 * SIZE(BO)	STFD	f1,   1 * SIZE(BO)#else	STFD	f0,   0 * SIZE(AO)	STFD	f1,   1 * SIZE(AO)#endif	STFD	f0,   0 * SIZE(CO1)	STFD	f1,   1 * SIZE(CO1)	lfs	f0,  FZERO 	fmr	f1,  f0	fmr	f2,  f0	fmr	f3,  f0#ifndef LN	addi	CO1, CO1, 2 * SIZE#endif#ifdef RT	slwi	r0, K, 1 + BASE_SHIFT	add	AORIG, AORIG, r0#endif#if defined(LT) || defined(RN)	sub	TEMP, K, KK	slwi	r0,   TEMP, 1 + BASE_SHIFT	slwi	TEMP, TEMP, 0 + BASE_SHIFT	add	AO, AO, r0	add	BO, BO, TEMP#endif#ifdef LN	subi	KK, KK, 2#endif#ifdef LT	addi	KK, KK, 2#endif	.align 4LL(90):	andi.	I,  M,  1	ble	LL(99)#if defined(LT) || defined(RN)	LFD	f16,  0 * SIZE(AO)	LFD	f17,  1 * SIZE(AO)	LFD	f18,  2 * SIZE(AO)	LFD	f19,  3 * SIZE(AO)	LFD	f20,  0 * SIZE(B)	LFD	f21,  1 * SIZE(B)	LFD	f22,  2 * SIZE(B)	LFD	f23,  3 * SIZE(B)	srawi.	r0, KK,  3	mtspr	CTR, r0	mr	BO,  B#else#ifdef LN	slwi	r0,   K,  BASE_SHIFT	sub	AORIG, AORIG, r0#endif	slwi	r0,   KK, 0 + BASE_SHIFT	slwi	TEMP, KK, 0 + BASE_SHIFT	add	AO, AORIG, r0	add	BO, B,     TEMP	sub	TEMP, K, KK	LFD	f16,  0 * SIZE(AO)	LFD	f17,  1 * SIZE(AO)	LFD	f18,  2 * SIZE(AO)	LFD	f19,  3 * SIZE(AO)	LFD	f20,  0 * SIZE(BO)	LFD	f21,  1 * SIZE(BO)	LFD	f22,  2 * SIZE(BO)	LFD	f23,  3 * SIZE(BO)	srawi.	r0, TEMP,  3	mtspr	CTR, r0#endif	ble	LL(95)	.align 5LL(92):	FMADD	f0,  f16, f20, f0	FMADD	f1,  f17, f21, f1	FMADD	f2,  f18, f22, f2	FMADD	f3,  f19, f23, f3	LFD	f16,  4 * SIZE(AO)	LFD	f17,  5 * SIZE(AO)	LFD	f18,  6 * SIZE(AO)	LFD	f19,  7 * SIZE(AO)	LFD	f20,  4 * SIZE(BO)	LFD	f21,  5 * SIZE(BO)	LFD	f22,  6 * SIZE(BO)	LFD	f23,  7 * SIZE(BO)	FMADD	f0,  f16, f20, f0	FMADD	f1,  f17, f21, f1	FMADD	f2,  f18, f22, f2	FMADD	f3,  f19, f23, f3	LFD	f16,  8 * SIZE(AO)	LFD	f17,  9 * SIZE(AO)	LFD	f18, 10 * SIZE(AO)	LFD	f19, 11 * SIZE(AO)	LFD	f20,  8 * SIZE(BO)	LFD	f21,  9 * SIZE(BO)	LFD	f22, 10 * SIZE(BO)	LFD	f23, 11 * SIZE(BO)	addi	AO, AO,  8 * SIZE	addi	BO, BO,  8 * SIZE	bdnz	LL(92)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -