⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 gemv_t_ppc440.s

📁 Optimized GotoBLAS libraries
💻 S
📖 第 1 页 / 共 2 页
字号:
	LFDU	a1,  1 * SIZE(AO1)	FMADD	y06, a6, b2, y06	LFDU	a2,  1 * SIZE(AO2)	FMADD	y07, a7, b2, y07	LFDU	a3,  1 * SIZE(AO3)	FMADD	y08, a8, b2, y08	LFDU	a4,  1 * SIZE(AO4)	LFDU	b4,  1 * SIZE(X1)	FMADD	y01, a1, b3, y01	LFDU	a5,  1 * SIZE(AO1)	FMADD	y02, a2, b3, y02	LFDU	a6,  1 * SIZE(AO2)	FMADD	y03, a3, b3, y03	LFDU	a7,  1 * SIZE(AO3)	FMADD	y04, a4, b3, y04	LFDU	a8,  1 * SIZE(AO4)	FMADD	y05, a5, b4, y05	FMADD	y06, a6, b4, y06	FMADD	y07, a7, b4, y07	FMADD	y08, a8, b4, y08	.align 4LL(26):	andi.	r0, M,  2	ble	LL(27)	LFDU	b1,  1 * SIZE(X1)	LFDU	a1,  1 * SIZE(AO1)	LFDU	a2,  1 * SIZE(AO2)	LFDU	a3,  1 * SIZE(AO3)	LFDU	a4,  1 * SIZE(AO4)	LFDU	b2,  1 * SIZE(X1)	FMADD	y01, a1, b1, y01	LFDU	a5,  1 * SIZE(AO1)	FMADD	y02, a2, b1, y02	LFDU	a6,  1 * SIZE(AO2)	FMADD	y03, a3, b1, y03	LFDU	a7,  1 * SIZE(AO3)	FMADD	y04, a4, b1, y04	LFDU	a8,  1 * SIZE(AO4)	FMADD	y05, a5, b2, y05	FMADD	y06, a6, b2, y06	FMADD	y07, a7, b2, y07	FMADD	y08, a8, b2, y08	.align 4LL(27):	andi.	r0, M,  1	ble	LL(28)	LFDU	a1,  1 * SIZE(AO1)	LFDU	b1,  1 * SIZE(X1)	LFDU	a2,  1 * SIZE(AO2)	LFDU	a3,  1 * SIZE(AO3)	LFDU	a4,  1 * SIZE(AO4)	FMADD	y01, a1, b1, y01	FMADD	y02, a2, b1, y02	FMADD	y03, a3, b1, y03	FMADD	y04, a4, b1, y04	.align 4LL(28):	lfd	alpha, ALPHA	LFDUX	a1, Y, INCY	LFDUX	a2, Y, INCY	LFDUX	a3, Y, INCY	LFDUX	a4, Y, INCY	FADD	y01, y05, y01	FADD	y02, y06, y02	FADD	y03, y07, y03	FADD	y04, y08, y04	FMADD	a1, alpha, f0, a1	FMADD	a2, alpha, f1, a2	FMADD	a3, alpha, f2, a3	FMADD	a4, alpha, f3, a4	STFDUX	a1, YY, INCY	addi	J, J, -1	STFDUX	a2, YY, INCY	cmpi	cr0, 0, J, 0	STFDUX	a3, YY, INCY	STFDUX	a4, YY, INCY	bgt	LL(21)	.align 4LL(30):	andi.	J, N, 2	ble	LL(40)	mr     AO1, A	add    AO2, A,   LDA	add    A,   AO2, LDA	mr     X1, XP	lfd	 y01, FZERO	fmr	 y02, y01	fmr	 y03, y01	fmr	 y04, y01	srawi.	r0,  M, 3	mtspr	CTR, r0	ble	LL(34)	LFDU	a1,  1 * SIZE(AO1)	LFDU	a2,  1 * SIZE(AO2)	LFDU	b1,  1 * SIZE(X1)	LFDU	b2,  1 * SIZE(X1)	LFDU	a5,  1 * SIZE(AO1)	LFDU	a6,  1 * SIZE(AO2)	LFDU	b3,  1 * SIZE(X1)	LFDU	b4,  1 * SIZE(X1)	bdz	LL(33)	.align 4LL(32):#ifdef PPCG4	dcbt	X1, PREA#endif	FMADD	y01, a1, b1, y01	LFDU	a1,  1 * SIZE(AO1)	FMADD	y02, a2, b1, y02	LFDU	a2,  1 * SIZE(AO2)	LFDU	b1,  1 * SIZE(X1)#ifdef PPCG4	dcbt	AO1, PREA#endif	FMADD	y03, a5, b2, y03	LFDU	a5,  1 * SIZE(AO1)	FMADD	y04, a6, b2, y04	LFDU	a6,  1 * SIZE(AO2)	LFDU	b2,  1 * SIZE(X1)	FMADD	y01, a1, b3, y01	LFDU	a1,  1 * SIZE(AO1)	FMADD	y02, a2, b3, y02	LFDU	a2,  1 * SIZE(AO2)	LFDU	b3,  1 * SIZE(X1)#ifdef PPCG4	dcbt	AO2, PREA#endif	FMADD	y03, a5, b4, y03	LFDU	a5,  1 * SIZE(AO1)	FMADD	y04, a6, b4, y04	LFDU	a6,  1 * SIZE(AO2)	LFDU	b4,  1 * SIZE(X1)	FMADD	y01, a1, b1, y01	LFDU	a1,  1 * SIZE(AO1)	FMADD	y02, a2, b1, y02	LFDU	a2,  1 * SIZE(AO2)#if defined(PPCG4) && defined(DOUBLE)	dcbt	X1, PREA#endif	LFDU	b1,  1 * SIZE(X1)#if defined(PPCG4) && defined(DOUBLE)	dcbt	AO1, PREA#endif	FMADD	y03, a5, b2, y03	LFDU	a5,  1 * SIZE(AO1)	FMADD	y04, a6, b2, y04	LFDU	a6,  1 * SIZE(AO2)	LFDU	b2,  1 * SIZE(X1)	FMADD	y01, a1, b3, y01	LFDU	a1,  1 * SIZE(AO1)	FMADD	y02, a2, b3, y02	LFDU	a2,  1 * SIZE(AO2)	LFDU	b3,  1 * SIZE(X1)#if defined(PPCG4) && defined(DOUBLE)	dcbt	AO2, PREA#endif	FMADD	y03, a5, b4, y03	LFDU	a5,  1 * SIZE(AO1)	FMADD	y04, a6, b4, y04	LFDU	a6,  1 * SIZE(AO2)	LFDU	b4,  1 * SIZE(X1) 	bdnz	LL(32)	.align 4	LL(33):	FMADD	y01, a1, b1, y01	LFDU	a1,  1 * SIZE(AO1)	FMADD	y02, a2, b1, y02	LFDU	a2,  1 * SIZE(AO2)	LFDU	b1,  1 * SIZE(X1)	FMADD	y03, a5, b2, y03	LFDU	a5,  1 * SIZE(AO1)	FMADD	y04, a6, b2, y04	LFDU	a6,  1 * SIZE(AO2)	LFDU	b2,  1 * SIZE(X1)	FMADD	y01, a1, b3, y01	LFDU	a1,  1 * SIZE(AO1)	FMADD	y02, a2, b3, y02	LFDU	a2,  1 * SIZE(AO2)	LFDU	b3,  1 * SIZE(X1)	FMADD	y03, a5, b4, y03	LFDU	a5,  1 * SIZE(AO1)	FMADD	y04, a6, b4, y04	LFDU	a6,  1 * SIZE(AO2)	LFDU	b4,  1 * SIZE(X1)	FMADD	y01, a1, b1, y01	LFDU	a1,  1 * SIZE(AO1)	FMADD	y02, a2, b1, y02	LFDU	a2,  1 * SIZE(AO2)	FMADD	y03, a5, b2, y03	LFDU	a5,  1 * SIZE(AO1)	FMADD	y04, a6, b2, y04	LFDU	a6,  1 * SIZE(AO2)	FMADD	y01, a1, b3, y01	FMADD	y02, a2, b3, y02	FMADD	y03, a5, b4, y03	FMADD	y04, a6, b4, y04	.align 4LL(34):	andi.	r0, M,  7	ble	LL(38)	andi.	r0, M,  4	ble	LL(36)	LFDU	a1,  1 * SIZE(AO1)	LFDU	a2,  1 * SIZE(AO2)	LFDU	b1,  1 * SIZE(X1)	LFDU	b2,  1 * SIZE(X1)	FMADD	y01, a1, b1, y01	LFDU	a5,  1 * SIZE(AO1)	FMADD	y02, a2, b1, y02	LFDU	a6,  1 * SIZE(AO2)	LFDU	b3,  1 * SIZE(X1)	FMADD	y03, a5, b2, y03	LFDU	a1,  1 * SIZE(AO1)	FMADD	y04, a6, b2, y04	LFDU	a2,  1 * SIZE(AO2)	LFDU	b4,  1 * SIZE(X1)	FMADD	y01, a1, b3, y01	LFDU	a5,  1 * SIZE(AO1)	FMADD	y02, a2, b3, y02	LFDU	a6,  1 * SIZE(AO2)	FMADD	y03, a5, b4, y03	FMADD	y04, a6, b4, y04	.align 4LL(36):	andi.	r0, M,  2	ble	LL(37)	LFDU	b1,  1 * SIZE(X1)	LFDU	a1,  1 * SIZE(AO1)	LFDU	a2,  1 * SIZE(AO2)	LFDU	b2,  1 * SIZE(X1)	LFDU	a3,  1 * SIZE(AO1)	LFDU	a4,  1 * SIZE(AO2)	FMADD	y01, a1, b1, y01	FMADD	y02, a2, b1, y02	FMADD	y03, a3, b2, y03	FMADD	y04, a4, b2, y04	.align 4LL(37):	andi.	r0, M,  1	ble	LL(38)	LFDU	a1,  1 * SIZE(AO1)	LFDU	b1,  1 * SIZE(X1)	LFDU	a2,  1 * SIZE(AO2)	FMADD	y01, a1, b1, y01	FMADD	y02, a2, b1, y02	.align 4LL(38):	lfd	alpha, ALPHA	LFDUX	a1, Y, INCY	LFDUX	a2, Y, INCY	FADD	y01, y03, y01	FADD	y02, y04, y02	FMADD	a1, alpha, f0, a1	FMADD	a2, alpha, f1, a2	STFDUX	a1, YY, INCY	STFDUX	a2, YY, INCY	.align 4LL(40):	andi.	J, N, 1	ble	LL(999)	mr     AO1, A	add    A,   A, LDA	mr     X1, XP	lfd	 y01, FZERO	fmr	 y02, y01	srawi.	r0,  M, 3	mtspr	CTR, r0	ble	LL(44)	LFDU	a1,  1 * SIZE(AO1)	LFDU	a2,  1 * SIZE(AO1)	LFDU	a3,  1 * SIZE(AO1)	LFDU	a4,  1 * SIZE(AO1)	LFDU	b1,  1 * SIZE(X1)	LFDU	b2,  1 * SIZE(X1)	LFDU	b3,  1 * SIZE(X1)	LFDU	b4,  1 * SIZE(X1)	bdz	LL(43)	.align 4LL(42):	FMADD	y01, a1, b1, y01	LFDU	a1,  1 * SIZE(AO1)	LFDU	b1,  1 * SIZE(X1)#ifdef PPCG4	dcbt	X1, PREA#endif	FMADD	y02, a2, b2, y02	LFDU	a2,  1 * SIZE(AO1)	LFDU	b2,  1 * SIZE(X1)#ifdef PPCG4	dcbt	AO1, PREA#endif	FMADD	y01, a3, b3, y01	LFDU	a3,  1 * SIZE(AO1)	LFDU	b3,  1 * SIZE(X1)	FMADD	y02, a4, b4, y02 	LFDU	a4,  1 * SIZE(AO1)	LFDU	b4,  1 * SIZE(X1)	FMADD	y01, a1, b1, y01	LFDU	a1,  1 * SIZE(AO1)	LFDU	b1,  1 * SIZE(X1)	FMADD	y02, a2, b2, y02	LFDU	a2,  1 * SIZE(AO1)	LFDU	b2,  1 * SIZE(X1)#if defined(PPCG4) && defined(DOUBLE)	dcbt	AO1, PREA#endif	FMADD	y01, a3, b3, y01	LFDU	a3,  1 * SIZE(AO1)	LFDU	b3,  1 * SIZE(X1)#if defined(PPCG4) && defined(DOUBLE)	dcbt	X1, PREA#endif	FMADD	y02, a4, b4, y02 	LFDU	a4,  1 * SIZE(AO1)	LFDU	b4,  1 * SIZE(X1) 	bdnz	LL(42)	.align 4	LL(43):	FMADD	y01, a1, b1, y01	LFDU	a1,  1 * SIZE(AO1)	LFDU	b1,  1 * SIZE(X1)	FMADD	y02, a2, b2, y02	LFDU	a2,  1 * SIZE(AO1)	LFDU	b2,  1 * SIZE(X1)	FMADD	y01, a3, b3, y01	LFDU	a3,  1 * SIZE(AO1)	LFDU	b3,  1 * SIZE(X1)	FMADD	y02, a4, b4, y02 	LFDU	a4,  1 * SIZE(AO1)	LFDU	b4,  1 * SIZE(X1)	FMADD	y01, a1, b1, y01	FMADD	y02, a2, b2, y02	FMADD	y01, a3, b3, y01	FMADD	y02, a4, b4, y02	.align 4LL(44):	andi.	r0, M,  7	ble	LL(48)	andi.	r0, M,  4	ble	LL(46)	LFDU	a1,  1 * SIZE(AO1)	LFDU	b1,  1 * SIZE(X1)	LFDU	a2,  1 * SIZE(AO1)	LFDU	b2,  1 * SIZE(X1)	FMADD	y01, a1, b1, y01	LFDU	a3,  1 * SIZE(AO1)	LFDU	b3,  1 * SIZE(X1)	FMADD	y02, a2, b2, y02	LFDU	a4,  1 * SIZE(AO1)	LFDU	b4,  1 * SIZE(X1)	FMADD	y01, a3, b3, y01	FMADD	y02, a4, b4, y02	.align 4LL(46):	andi.	r0, M,  2	ble	LL(47)	LFDU	b1,  1 * SIZE(X1)	LFDU	a1,  1 * SIZE(AO1)	LFDU	b2,  1 * SIZE(X1)	LFDU	a2,  1 * SIZE(AO1)	FMADD	y01, a1, b1, y01	FMADD	y02, a2, b2, y02	.align 4LL(47):	andi.	r0, M,  1	ble	LL(48)	LFDU	a1,  1 * SIZE(AO1)	LFDU	b1,  1 * SIZE(X1)	FMADD	y01, a1, b1, y01	.align 4LL(48):	lfd	alpha, ALPHA	LFDUX	a1, Y, INCY	FADD	y01, y02, y01	FMADD	a1, alpha, f0, a1	STFDUX	a1, YY, INCY	.align 4LL(999):	li	r3, 0	lfd	f14,     0(SP)	lfd	f15,     8(SP)	lfd	f16,    16(SP)	lfd	f17,    24(SP)	lfd	f18,    32(SP)	lfd	f19,    40(SP)	lfd	f20,    48(SP)	lfd	f21,    56(SP)	lfd	f22,    64(SP)	lfd	f23,    72(SP)#ifdef __64BIT__	ld	r14,   160(SP)	ld	r15,   168(SP)	ld	r16,   176(SP)	ld	r17,   184(SP)	ld	r18,   192(SP)	ld	r19,   200(SP)	ld	r20,   208(SP)	ld	r21,   216(SP)	ld	r22,   224(SP)#else	lwz	r14,   160(SP)	lwz	r15,   164(SP)	lwz	r16,   168(SP)	lwz	r17,   172(SP)	lwz	r18,   176(SP)	lwz	r19,   180(SP)	lwz	r20,   184(SP)	lwz	r21,   188(SP)	lwz	r22,   192(SP)#endif	addi	SP, SP, STACKSIZE	blr	EPILOGUE#endif

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -