⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 ztrsm_kernel_ppc440_ln.s

📁 Optimized GotoBLAS libraries
💻 S
📖 第 1 页 / 共 3 页
字号:
	LFD	B3,  2 * SIZE(BO)	LFD	B4,  3 * SIZE(BO)	LFD	B5,  4 * SIZE(BO)	LFD	B6,  8 * SIZE(BO)	LFD	B7, 12 * SIZE(BO)	lfs	f0,  FZERO 	fmr	f1,  f0	fmr	f2,  f0	fmr	f3,  f0	fmr	f4,  f0	fmr	f5,  f0	fmr	f6,  f0	fmr	f7,  f0	fmr	f8,  f0	fmr	f9,  f0	fmr	f10, f0	fmr	f11, f0	fmr	f12, f0	fmr	f13, f0	fmr	f14, f0	fmr	f15, f0	srawi.	r0, TEMP,  2	mtspr	CTR, r0#endif	ble	.L15	.align 4.L12:	FMADD	f0,  A1, B1, f0	LFD	A3,  2 * SIZE(AO)	FMADD	f4,  A1, B2, f4	LFD	A6, 12 * SIZE(AO)	FMADD	f8,  A1, B3, f8	nop	FMADD	f12, A1, B4, f12	nop	FMADD	f1,  A2, B1, f1	LFD	A1,  3 * SIZE(AO)	FMADD	f5,  A2, B2, f5	nop	FMADD	f9,  A2, B3, f9	nop	FMADD	f13, A2, B4, f13	nop	FMADD	f2,  A3, B1, f2	nop	FMADD	f6,  A3, B2, f6	LFD	B8,  5 * SIZE(BO)	FMADD	f10, A3, B3, f10	LFD	B9,  6 * SIZE(BO)	FMADD	f14, A3, B4, f14	LFD	B10, 7 * SIZE(BO)	FMADD	f3,  A1, B1, f3	LFD	A2,  5 * SIZE(AO)	FMADD	f7,  A1, B2, f7	LFD	B1, 16 * SIZE(BO)	FMADD	f11, A1, B3, f11	nop	FMADD	f15, A1, B4, f15	nop	FMADD	f0,  A4, B5, f0 	LFD	A3,  6 * SIZE(AO)	FMADD	f4,  A4, B8, f4	LFD	A1, 16 * SIZE(AO)	FMADD	f8,  A4, B9, f8	nop	FMADD	f12, A4, B10, f12	nop	FMADD	f1,  A2, B5, f1	LFD	A4,  7 * SIZE(AO)	FMADD	f5,  A2, B8, f5	nop	FMADD	f9,  A2, B9, f9	nop	FMADD	f13, A2, B10, f13	nop	FMADD	f2,  A3, B5, f2	nop	FMADD	f6,  A3, B8, f6	LFD	B2,  9 * SIZE(BO)	FMADD	f10, A3, B9, f10	LFD	B3, 10 * SIZE(BO)	FMADD	f14, A3, B10, f14	LFD	B4, 11 * SIZE(BO)	FMADD	f3,  A4, B5, f3	LFD	A2,  9 * SIZE(AO)	FMADD	f7,  A4, B8, f7	LFD	B5, 20 * SIZE(BO)	FMADD	f11, A4, B9, f11	nop	FMADD	f15, A4, B10, f15	nop	FMADD	f0,  A5, B6, f0	LFD	A3, 10 * SIZE(AO)	FMADD	f4,  A5, B2, f4	LFD	A4, 20 * SIZE(AO)	FMADD	f8,  A5, B3, f8	nop	FMADD	f12, A5, B4, f12	nop	FMADD	f1,  A2, B6, f1	LFD	A5, 11 * SIZE(AO)	FMADD	f5,  A2, B2, f5	nop	FMADD	f9,  A2, B3, f9	nop	FMADD	f13, A2, B4, f13	nop	FMADD	f2,  A3, B6, f2	nop	FMADD	f6,  A3, B2, f6	LFD	B8, 13 * SIZE(BO)	FMADD	f10, A3, B3, f10	LFD	B9, 14 * SIZE(BO)	FMADD	f14, A3, B4, f14	LFD	B10,15 * SIZE(BO)	FMADD	f3,  A5, B6, f3	LFD	A2, 13 * SIZE(AO)	FMADD	f7,  A5, B2, f7	LFD	B6, 24 * SIZE(BO)	FMADD	f11, A5, B3, f11	nop	FMADD	f15, A5, B4, f15	nop	FMADD	f0,  A6, B7, f0	LFD	A3, 14 * SIZE(AO)	FMADD	f4,  A6, B8, f4	LFD	A5, 24 * SIZE(AO)	FMADD	f8,  A6, B9, f8	nop	FMADD	f12, A6, B10, f12	nop	FMADD	f1,  A2, B7, f1	LFD	A6, 15 * SIZE(AO)	FMADD	f5,  A2, B8, f5	nop	FMADD	f9,  A2, B9, f9	nop	FMADD	f13, A2, B10, f13	nop	FMADD	f2,  A3, B7, f2	addi	AO, AO, 16 * SIZE	FMADD	f6,  A3, B8, f6	LFD	B2, 17 * SIZE(BO)	FMADD	f10, A3, B9, f10	LFD	B3, 18 * SIZE(BO)	FMADD	f14, A3, B10, f14	LFD	B4, 19 * SIZE(BO)	FMADD	f3,  A6, B7, f3	LFD	A2,  1 * SIZE(AO)	FMADD	f7,  A6, B8, f7	LFD	B7, 28 * SIZE(BO)	FMADD	f11, A6, B9, f11	addi	BO, BO, 16 * SIZE	FMADD	f15, A6, B10, f15	bdnz	.L12	.align 4.L15:#if defined(LT) || defined(RN)	andi.	r0, KK,  3#else	andi.	r0, TEMP, 3#endif	mtspr	CTR, r0	ble	.LKERNEL_MainFinish	.align 4.L16:	FMADD	f0,  A1, B1, f0	LFD	A3,  2 * SIZE(AO)	FMADD	f4,  A1, B2, f4	FMADD	f8,  A1, B3, f8	FMADD	f12, A1, B4, f12	LFD	A4,  3 * SIZE(AO)	FMADD	f1,  A2, B1, f1	FMADD	f5,  A2, B2, f5	FMADD	f9,  A2, B3, f9	FMADD	f13, A2, B4, f13	LFDU	A1,  4 * SIZE(AO)	FMADD	f2,  A3, B1, f2	FMADD	f6,  A3, B2, f6	FMADD	f10, A3, B3, f10	FMADD	f14, A3, B4, f14	LFD	A2,  1 * SIZE(AO)	FMADD	f3,  A4, B1, f3	LFDU	B1,  4 * SIZE(BO)	FMADD	f7,  A4, B2, f7	LFD	B2,  1 * SIZE(BO)	FMADD	f11, A4, B3, f11	LFD	B3,  2 * SIZE(BO)	FMADD	f15, A4, B4, f15	LFD	B4,  3 * SIZE(BO)	bdnz	.L16	.align 4.LKERNEL_MainFinish:#ifndef CONJ	FSUB	  f0,  f0,  f5	FADD	  f1,  f1,  f4	FSUB	  f2,  f2,  f7	FADD	  f3,  f3,  f6	FSUB	  f8,  f8,  f13	FADD	  f9,  f9,  f12	FSUB	  f10, f10, f15	FADD	  f11, f11, f14#else	FADD	  f0,  f0,  f5	FSUB	  f1,  f4,  f1	FADD	  f2,  f2,  f7	FSUB	  f3,  f6,  f3	FADD	  f8,  f8,  f13	FSUB	  f9,  f12, f9	FADD	  f10, f10, f15	FSUB	  f11, f14, f11#endif#if defined(LN) || defined(RT)	subi	r0, KK, 2	slwi	r0, r0, 1 + ZBASE_SHIFT	add	AO, AORIG, r0	add	BO, B,     r0#endif#if defined(LN) || defined(LT)	LFD	f16,  0 * SIZE(BO)	LFD	f17,  1 * SIZE(BO)	LFD	f18,  2 * SIZE(BO)	LFD	f19,  3 * SIZE(BO) 	LFD	f20,  4 * SIZE(BO) 	LFD	f21,  5 * SIZE(BO)	LFD	f22,  6 * SIZE(BO)	LFD	f23,  7 * SIZE(BO)	FSUB	f0,  f16, f0	FSUB	f1,  f17, f1	FSUB	f8,  f18, f8	FSUB	f9,  f19, f9	FSUB	f2,  f20, f2	FSUB	f3,  f21, f3	FSUB	f10, f22, f10	FSUB	f11, f23, f11#else	LFD	f16,  0 * SIZE(AO)	LFD	f17,  1 * SIZE(AO)	LFD	f18,  2 * SIZE(AO)	LFD	f19,  3 * SIZE(AO) 	LFD	f20,  4 * SIZE(AO) 	LFD	f21,  5 * SIZE(AO)	LFD	f22,  6 * SIZE(AO)	LFD	f23,  7 * SIZE(AO)#ifndef CONJ	FSUB	f0,  f16, f0	FSUB	f1,  f17, f1	FSUB	f2,  f18, f2	FSUB	f3,  f19, f3	FSUB	f8,  f20, f8	FSUB	f9,  f21, f9	FSUB	f10, f22, f10	FSUB	f11, f23, f11#else	FSUB	f0,  f16, f0	FADD	f1,  f17, f1	FSUB	f2,  f18, f2	FADD	f3,  f19, f3	FSUB	f8,  f20, f8	FADD	f9,  f21, f9	FSUB	f10, f22, f10	FADD	f11, f23, f11#endif#endif#ifdef LN	LFD	f16,  6 * SIZE(AO)	LFD	f17,  7 * SIZE(AO)	LFD	f18,  4 * SIZE(AO)	LFD	f19,  5 * SIZE(AO)	LFD	f20,  0 * SIZE(AO)	LFD	f21,  1 * SIZE(AO)	FMUL	f6,  f17, f3	FMUL	f7,  f17, f2	FMUL	f14, f17, f11	FMUL	f15, f17, f10#ifndef CONJ	FMSUB	f2,  f16, f2,  f6	FMADD	f3,  f16, f3,  f7	FMSUB	f10, f16, f10, f14	FMADD	f11, f16, f11, f15	FMADD	f0,  f19, f3,  f0	FNMSUB	f1,  f19, f2,  f1	FMADD	f8,  f19, f11, f8	FNMSUB	f9,  f19, f10, f9	FNMSUB	f0,  f18, f2,  f0	FNMSUB	f1,  f18, f3,  f1	FNMSUB	f8,  f18, f10, f8	FNMSUB	f9,  f18, f11, f9	FMUL	f4,  f21, f1	FMUL	f5,  f21, f0	FMUL	f12, f21, f9	FMUL	f13, f21, f8	FMSUB	f0,  f20, f0,  f4	FMADD	f1,  f20, f1,  f5	FMSUB	f8,  f20, f8,  f12	FMADD	f9,  f20, f9,  f13#else	FMADD	f2,  f16, f2,  f6	FMSUB	f3,  f16, f3,  f7	FMADD	f10, f16, f10, f14	FMSUB	f11, f16, f11, f15	FMSUB	f0,  f19, f3,  f0	FNMADD	f1,  f19, f2,  f1	FMSUB	f8,  f19, f11, f8	FNMADD	f9,  f19, f10, f9	FNMADD	f0,  f18, f2,  f0	FNMADD	f1,  f18, f3,  f1	FNMADD	f8,  f18, f10, f8	FNMADD	f9,  f18, f11, f9	FMUL	f4,  f21, f1	FMUL	f5,  f21, f0	FMUL	f12, f21, f9	FMUL	f13, f21, f8	FMADD	f0,  f20, f0,  f4	FMSUB	f1,  f20, f1,  f5	FMADD	f8,  f20, f8,  f12	FMSUB	f9,  f20, f9,  f13#endif#endif#ifdef LT	LFD	f16,  0 * SIZE(AO)	LFD	f17,  1 * SIZE(AO)	LFD	f18,  2 * SIZE(AO)	LFD	f19,  3 * SIZE(AO)	LFD	f20,  6 * SIZE(AO)	LFD	f21,  7 * SIZE(AO)	FMUL	f4,  f17, f1	FMUL	f5,  f17, f0	FMUL	f12, f17, f9	FMUL	f13, f17, f8#ifndef CONJ	FMSUB	f0,  f16, f0,  f4	FMADD	f1,  f16, f1,  f5	FMSUB	f8,  f16, f8,  f12	FMADD	f9,  f16, f9,  f13	FMADD	f2,  f19, f1, f2	FNMSUB	f3,  f19, f0, f3	FMADD	f10, f19, f9, f10	FNMSUB	f11, f19, f8, f11	FNMSUB	f2,  f18, f0,  f2	FNMSUB	f3,  f18, f1,  f3	FNMSUB	f10, f18, f8,  f10	FNMSUB	f11, f18, f9,  f11	FMUL	f4,  f21, f3	FMUL	f5,  f21, f2	FMUL	f12, f21, f11	FMUL	f13, f21, f10	FMSUB	f2,  f20, f2,  f4	FMADD	f3,  f20, f3,  f5	FMSUB	f10, f20, f10, f12	FMADD	f11, f20, f11, f13#else	FMADD	f0,  f16, f0,  f4	FMSUB	f1,  f16, f1,  f5	FMADD	f8,  f16, f8,  f12	FMSUB	f9,  f16, f9,  f13	FMSUB	f2,  f19, f1, f2	FNMADD	f3,  f19, f0, f3	FMSUB	f10, f19, f9, f10	FNMADD	f11, f19, f8, f11	FNMADD	f2,  f18, f0,  f2	FNMADD	f3,  f18, f1,  f3	FNMADD	f10, f18, f8,  f10	FNMADD	f11, f18, f9,  f11	FMUL	f4,  f21, f3	FMUL	f5,  f21, f2	FMUL	f12, f21, f11	FMUL	f13, f21, f10	FMADD	f2,  f20, f2,  f4	FMSUB	f3,  f20, f3,  f5	FMADD	f10, f20, f10, f12	FMSUB	f11, f20, f11, f13#endif#endif#ifdef RN	LFD	f16,  0 * SIZE(BO)	LFD	f17,  1 * SIZE(BO)	LFD	f18,  2 * SIZE(BO)	LFD	f19,  3 * SIZE(BO)	LFD	f20,  6 * SIZE(BO)	LFD	f21,  7 * SIZE(BO)	FMUL	f4,  f17, f1	FMUL	f5,  f17, f0	FMUL	f6,  f17, f3	FMUL	f7,  f17, f2#ifndef CONJ	FMSUB	f0,  f16, f0,  f4	FMADD	f1,  f16, f1,  f5	FMSUB	f2,  f16, f2,  f6	FMADD	f3,  f16, f3,  f7	FMADD	f8,  f19, f1, f8	FNMSUB	f9,  f19, f0, f9	FMADD	f10, f19, f3, f10	FNMSUB	f11, f19, f2, f11	FNMSUB	f8,  f18, f0,  f8	FNMSUB	f9,  f18, f1,  f9	FNMSUB	f10, f18, f2,  f10	FNMSUB	f11, f18, f3,  f11	FMUL	f4,  f21, f9	FMUL	f5,  f21, f8	FMUL	f6,  f21, f11	FMUL	f7,  f21, f10	FMSUB	f8,  f20, f8,  f4	FMADD	f9,  f20, f9,  f5	FMSUB	f10, f20, f10, f6	FMADD	f11, f20, f11, f7#else	FMADD	f0,  f16, f0,  f4	FMSUB	f1,  f16, f1,  f5	FMADD	f2,  f16, f2,  f6	FMSUB	f3,  f16, f3,  f7	FMSUB	f8,  f19, f1, f8	FNMADD	f9,  f19, f0, f9	FMSUB	f10, f19, f3, f10	FNMADD	f11, f19, f2, f11	FNMADD	f8,  f18, f0,  f8	FNMADD	f9,  f18, f1,  f9	FNMADD	f10, f18, f2,  f10	FNMADD	f11, f18, f3,  f11	FMUL	f4,  f21, f9	FMUL	f5,  f21, f8	FMUL	f6,  f21, f11	FMUL	f7,  f21, f10	FMADD	f8,  f20, f8,  f4	FMSUB	f9,  f20, f9,  f5	FMADD	f10, f20, f10, f6	FMSUB	f11, f20, f11, f7#endif#endif#ifdef RT	LFD	f16,  6 * SIZE(BO)	LFD	f17,  7 * SIZE(BO)	LFD	f18,  4 * SIZE(BO)	LFD	f19,  5 * SIZE(BO)	LFD	f20,  0 * SIZE(BO)	LFD	f21,  1 * SIZE(BO)	FMUL	f12, f17, f9	FMUL	f13, f17, f8	FMUL	f14, f17, f11	FMUL	f15, f17, f10#ifndef CONJ	FMSUB	f8,  f16, f8,  f12	FMADD	f9,  f16, f9,  f13	FMSUB	f10, f16, f10, f14	FMADD	f11, f16, f11, f15	FMADD	f0,  f19, f9,  f0	FNMSUB	f1,  f19, f8,  f1	FMADD	f2,  f19, f11, f2	FNMSUB	f3,  f19, f10, f3	FNMSUB	f0,  f18, f8,  f0	FNMSUB	f1,  f18, f9,  f1	FNMSUB	f2,  f18, f10, f2	FNMSUB	f3,  f18, f11, f3	FMUL	f4,  f21, f1	FMUL	f5,  f21, f0	FMUL	f6,  f21, f3	FMUL	f7,  f21, f2	FMSUB	f0,  f20, f0,  f4	FMADD	f1,  f20, f1,  f5	FMSUB	f2,  f20, f2,  f6	FMADD	f3,  f20, f3,  f7#else	FMADD	f8,  f16, f8,  f12	FMSUB	f9,  f16, f9,  f13	FMADD	f10, f16, f10, f14	FMSUB	f11, f16, f11, f15	FMSUB	f0,  f19, f9,  f0	FNMADD	f1,  f19, f8,  f1	FMSUB	f2,  f19, f11, f2	FNMADD	f3,  f19, f10, f3	FNMADD	f0,  f18, f8,  f0	FNMADD	f1,  f18, f9,  f1	FNMADD	f2,  f18, f10, f2	FNMADD	f3,  f18, f11, f3	FMUL	f4,  f21, f1	FMUL	f5,  f21, f0	FMUL	f6,  f21, f3	FMUL	f7,  f21, f2	FMADD	f0,  f20, f0,  f4	FMSUB	f1,  f20, f1,  f5	FMADD	f2,  f20, f2,  f6	FMSUB	f3,  f20, f3,  f7#endif#endif#ifdef LN	subi	CO1, CO1, 4 * SIZE	subi	CO2, CO2, 4 * SIZE#endif#if defined(LN) || defined(LT)	STFD	f0,   0 * SIZE(BO)	STFD	f1,   1 * SIZE(BO)	STFD	f8,   2 * SIZE(BO)	STFD	f9,   3 * SIZE(BO)	STFD	f2,   4 * SIZE(BO)	STFD	f3,   5 * SIZE(BO)	STFD	f10,  6 * SIZE(BO)	STFD	f11,  7 * SIZE(BO)#else	STFD	f0,   0 * SIZE(AO)	STFD	f1,   1 * SIZE(AO)	STFD	f2,   2 * SIZE(AO)	STFD	f3,   3 * SIZE(AO)	STFD	f8,   4 * SIZE(AO)	STFD	f9,   5 * SIZE(AO)	STFD	f10,  6 * SIZE(AO)	STFD	f11,  7 * SIZE(AO)#endif	STFD	f0,   0 * SIZE(CO1)	STFD	f1,   1 * SIZE(CO1)	STFD	f2,   2 * SIZE(CO1)	STFD	f3,   3 * SIZE(CO1)	STFD	f8,   0 * SIZE(CO2)	STFD	f9,   1 * SIZE(CO2)	STFD	f10,  2 * SIZE(CO2)	STFD	f11,  3 * SIZE(CO2)#ifndef LN	addi	CO1, CO1, 4 * SIZE	addi	CO2, CO2, 4 * SIZE#endif	#ifdef RT	slwi	r0, K, 1 + ZBASE_SHIFT	add	AORIG, AORIG, r0#endif#if defined(LT) || defined(RN)	sub	TEMP, K, KK	slwi	TEMP, TEMP, 1 + ZBASE_SHIFT	add	AO, AO, TEMP	add	BO, BO, TEMP#endif#ifdef LT	addi	KK, KK, 2#endif#ifdef LN	subi	KK, KK, 2#endif	addic.	I, I, -1	bgt	.L11	.align 4.L29:#ifdef LN	slwi	r0, K, 1 + ZBASE_SHIFT	add	B, B, r0#endif#if defined(LT) || defined(RN)	mr	B,  BO#endif#ifdef RN	addi	KK, KK, 2#endif#ifdef RT	subi	KK, KK, 2#endif	addic.	J, J, -1	bgt	.L10	.align 4.L30:	andi.	J, N,  1	ble	.L999#ifdef RT	slwi	r0, K, 0 + ZBASE_SHIFT	sub	B, B, r0	sub	C, C, LDC#endif	mr	CO1, C#ifdef LN	add	KK, M, OFFSET#endif#ifdef LT	mr	KK, OFFSET#endif#if defined(LN) || defined(RT)	mr	AORIG, A#else	mr	AO, A#endif#ifndef RT	add	C, C, LDC#endif	andi.	I,  M,  1	ble	.L40#if defined(LT) || defined(RN)	LFD	f16,  0 * SIZE(AO)	LFD	f17,  1 * SIZE(AO)	LFD	f18,  2 * SIZE(AO)	LFD	f19,  3 * SIZE(AO)	LFD	f20,  0 * SIZE(B)	LFD	f21,  1 * SIZE(B)	LFD	f22,  2 * SIZE(B)	LFD	f23,  3 * SIZE(B)	lfs	f0, FZERO	fmr	f1, f0	fmr	f2, f0	fmr	f3, f0	fmr	f4, f0	fmr	f5, f0	fmr	f6, f0	fmr	f7, f0	srawi.	r0, KK,  2	mr	BO,  B	mtspr	CTR, r0#else#ifdef LN	slwi	r0,   K,  0 + ZBASE_SHIFT	sub	AORIG, AORIG, r0#endif	slwi	r0,   KK, 0 + ZBASE_SHIFT	add	AO, AORIG, r0	add	BO, B,     r0	sub	TEMP, K, KK	LFD	f16,  0 * SIZE(AO)	LFD	f17,  1 * SIZE(AO)	LFD	f18,  2 * SIZE(AO)	LFD	f19,  3 * SIZE(AO)	LFD	f20,  0 * SIZE(BO)	LFD	f21,  1 * SIZE(BO)	LFD	f22,  2 * SIZE(BO)	LFD	f23,  3 * SIZE(BO)	lfs	f0, FZERO	fmr	f1, f0	fmr	f2, f0	fmr	f3, f0	fmr	f4, f0	fmr	f5, f0	fmr	f6, f0	fmr	f7, f0

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -