⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 gemv_n_ppc440.s

📁 Optimized GotoBLAS libraries
💻 S
📖 第 1 页 / 共 2 页
字号:
	FMADD	y01, alpha3, a5, y01	FMADD	y02, alpha3, a6, y02	FMADD	y01, alpha4, a7, y01	FMADD	y02, alpha4, a8, y02	STFDU	y01, 1 * SIZE(Y2)	STFDU	y02, 1 * SIZE(Y2)	.align 4LL(28):	andi.	r0, M, 1	ble	LL(29)	LFDU	a1,  1 * SIZE(AO1)	LFDU	y01, 1 * SIZE(Y1)	LFDU	a2,  1 * SIZE(AO2)	LFDU	a3,  1 * SIZE(AO3)	LFDU	a4,  1 * SIZE(AO4)	FMADD	y01, alpha1, a1, y01	FMADD	y01, alpha2, a2, y01	FMADD	y01, alpha3, a3, y01	FMADD	y01, alpha4, a4, y01	STFDU	y01, 1 * SIZE(Y2)	.align 4LL(29):	addi	J, J, -1	lfd	alpha, ALPHA	cmpi	cr0, 0, J, 0	bgt	LL(21)	.align 4LL(30):	andi.	J, N, 2	ble	LL(40)	LFDUX	alpha1, X, INCX	LFDUX	alpha2, X, INCX	mr	AO1, A	add	AO2, A,   LDA	add	A,   AO2, LDA	FMUL	alpha1, alpha, alpha1	mr	Y1, YY	FMUL	alpha2, alpha, alpha2	mr	Y2, YY	srawi.	r0,  M, 3	mtspr	CTR, r0	ble	LL(35)	LFDU	y01, 1 * SIZE(Y1)	LFDU	a1,  1 * SIZE(AO1)	LFDU	y02, 1 * SIZE(Y1)	LFDU	a2,  1 * SIZE(AO1)	LFDU	y03, 1 * SIZE(Y1)	LFDU	a3,  1 * SIZE(AO1)	LFDU	y04, 1 * SIZE(Y1)	LFDU	a4,  1 * SIZE(AO1)	LFDU	y05, 1 * SIZE(Y1)	LFDU	a5,  1 * SIZE(AO1)	LFDU	y06, 1 * SIZE(Y1)	LFDU	a6,  1 * SIZE(AO1)	LFDU	y07, 1 * SIZE(Y1)	LFDU	a7,  1 * SIZE(AO1)	LFDU	y08, 1 * SIZE(Y1)	LFDU	a8,  1 * SIZE(AO1)	bdz	LL(33)	.align 4LL(32):#ifdef PPCG4	dcbtst	Y1, PREA#endif	FMADD	y09, alpha1, a1, y01	LFDU	a1,  1 * SIZE(AO2)	FMADD	y10, alpha1, a2, y02	LFDU	a2,  1 * SIZE(AO2)	FMADD	y11, alpha1, a3, y03	LFDU	a3,  1 * SIZE(AO2)	FMADD	y12, alpha1, a4, y04	LFDU	a4,  1 * SIZE(AO2)	LFDU	y01, 1 * SIZE(Y1)	LFDU	y02, 1 * SIZE(Y1)#ifdef PPCG4	dcbt	AO2, PREA#endif	FMADD	y13, alpha1, a5, y05	LFDU	a5,  1 * SIZE(AO2)	FMADD	y14, alpha1, a6, y06	LFDU	a6,  1 * SIZE(AO2)	FMADD	y15, alpha1, a7, y07	LFDU	a7,  1 * SIZE(AO2)	FMADD	y16, alpha1, a8, y08	LFDU	a8,  1 * SIZE(AO2)	LFDU	y03, 1 * SIZE(Y1)	LFDU	y04, 1 * SIZE(Y1)#if defined(PPCG4) && defined(DOUBLE)	dcbt	AO2, PREA#endif	FMADD	y09, alpha2, a1, y09	LFDU	a1,  1 * SIZE(AO1)	FMADD	y10, alpha2, a2, y10	LFDU	a2,  1 * SIZE(AO1)	FMADD	y11, alpha2, a3, y11	LFDU	a3,  1 * SIZE(AO1)	FMADD	y12, alpha2, a4, y12	LFDU	a4,  1 * SIZE(AO1)#if defined(PPCG4) && defined(DOUBLE)	dcbtst	Y1, PREA#endif	LFDU	y05, 1 * SIZE(Y1)	LFDU	y06, 1 * SIZE(Y1)#ifdef PPCG4	dcbt	AO1, PREA#endif	FMADD	y13, alpha2, a5, y13	LFDU	a5,  1 * SIZE(AO1)	FMADD	y14, alpha2, a6, y14	LFDU	a6,  1 * SIZE(AO1)	FMADD	y15, alpha2, a7, y15	LFDU	a7,  1 * SIZE(AO1)	FMADD	y16, alpha2, a8, y16	LFDU	a8,  1 * SIZE(AO1)	LFDU	y07, 1 * SIZE(Y1)	LFDU	y08, 1 * SIZE(Y1)#if defined(PPCG4) && defined(DOUBLE)	dcbt	AO1, PREA#endif	STFDU	y09, 1 * SIZE(Y2)	STFDU	y10, 1 * SIZE(Y2)	STFDU	y11, 1 * SIZE(Y2)	STFDU	y12, 1 * SIZE(Y2)	STFDU	y13, 1 * SIZE(Y2)	STFDU	y14, 1 * SIZE(Y2)	STFDU	y15, 1 * SIZE(Y2)	STFDU	y16, 1 * SIZE(Y2)	bdnz	LL(32)	.align 4LL(33):	FMADD	y01, alpha1, a1, y01	LFDU	a1,  1 * SIZE(AO2)	FMADD	y02, alpha1, a2, y02	LFDU	a2,  1 * SIZE(AO2)	FMADD	y03, alpha1, a3, y03	LFDU	a3,  1 * SIZE(AO2)	FMADD	y04, alpha1, a4, y04	LFDU	a4,  1 * SIZE(AO2)	FMADD	y05, alpha1, a5, y05	LFDU	a5,  1 * SIZE(AO2)	FMADD	y06, alpha1, a6, y06	LFDU	a6,  1 * SIZE(AO2)	FMADD	y07, alpha1, a7, y07	LFDU	a7,  1 * SIZE(AO2)	FMADD	y08, alpha1, a8, y08	LFDU	a8,  1 * SIZE(AO2)	FMADD	y01, alpha2, a1, y01	FMADD	y02, alpha2, a2, y02	FMADD	y03, alpha2, a3, y03	FMADD	y04, alpha2, a4, y04	FMADD	y05, alpha2, a5, y05	STFDU	y01, 1 * SIZE(Y2)	FMADD	y06, alpha2, a6, y06	STFDU	y02, 1 * SIZE(Y2)	FMADD	y07, alpha2, a7, y07	STFDU	y03, 1 * SIZE(Y2)	FMADD	y08, alpha2, a8, y08	STFDU	y04, 1 * SIZE(Y2)	STFDU	y05, 1 * SIZE(Y2)	STFDU	y06, 1 * SIZE(Y2)	STFDU	y07, 1 * SIZE(Y2)	STFDU	y08, 1 * SIZE(Y2)	.align 4LL(35):	andi.	r0, M, 7	ble	LL(40)	andi.	r0, M, 4	ble	LL(37)	LFDU	a1,  1 * SIZE(AO1)	LFDU	y01, 1 * SIZE(Y1)	LFDU	a2,  1 * SIZE(AO1)	LFDU	y02, 1 * SIZE(Y1)	LFDU	a3,  1 * SIZE(AO1)	LFDU	y03, 1 * SIZE(Y1)	LFDU	a4,  1 * SIZE(AO1)	LFDU	y04, 1 * SIZE(Y1)	FMADD	y01, alpha1, a1, y01	LFDU	a5,  1 * SIZE(AO2)	FMADD	y02, alpha1, a2, y02	LFDU	a6,  1 * SIZE(AO2)	FMADD	y03, alpha1, a3, y03	LFDU	a7,  1 * SIZE(AO2)	FMADD	y04, alpha1, a4, y04	LFDU	a8,  1 * SIZE(AO2)	FMADD	y01, alpha2, a5, y01	FMADD	y02, alpha2, a6, y02	FMADD	y03, alpha2, a7, y03	FMADD	y04, alpha2, a8, y04	STFDU	y01, 1 * SIZE(Y2)	STFDU	y02, 1 * SIZE(Y2)	STFDU	y03, 1 * SIZE(Y2)	STFDU	y04, 1 * SIZE(Y2)	.align 4LL(37):	andi.	r0, M, 2	ble	LL(38)	LFDU	a1,  1 * SIZE(AO1)	LFDU	y01, 1 * SIZE(Y1)	LFDU	a2,  1 * SIZE(AO1)	LFDU	y02, 1 * SIZE(Y1)	LFDU	a3,  1 * SIZE(AO2)	LFDU	a4,  1 * SIZE(AO2)	FMADD	y01, alpha1, a1, y01	FMADD	y02, alpha1, a2, y02	FMADD	y01, alpha2, a3, y01	FMADD	y02, alpha2, a4, y02	STFDU	y01, 1 * SIZE(Y2)	STFDU	y02, 1 * SIZE(Y2)	.align 4LL(38):	andi.	r0, M, 1	ble	LL(40)	LFDU	a1,  1 * SIZE(AO1)	LFDU	y01, 1 * SIZE(Y1)	LFDU	a2,  1 * SIZE(AO2)	FMADD	y01, alpha1, a1, y01	FMADD	y01, alpha2, a2, y01	STFDU	y01, 1 * SIZE(Y2)	.align 4LL(40):	andi.	J, N, 1	lfd	alpha, ALPHA	ble	LL(990)	LFDUX	alpha1, X, INCX	mr	AO1, A	add	A,   A, LDA	FMUL	alpha1, alpha, alpha1	mr	Y1, YY	mr	Y2, YY	srawi.	r0,  M, 3	mtspr	CTR, r0	ble	LL(45)	LFDU	y01, 1 * SIZE(Y1)	LFDU	a1,  1 * SIZE(AO1)	LFDU	y02, 1 * SIZE(Y1)	LFDU	a2,  1 * SIZE(AO1)	LFDU	y03, 1 * SIZE(Y1)	LFDU	a3,  1 * SIZE(AO1)	LFDU	y04, 1 * SIZE(Y1)	LFDU	a4,  1 * SIZE(AO1)	LFDU	y05, 1 * SIZE(Y1)	LFDU	a5,  1 * SIZE(AO1)	LFDU	y06, 1 * SIZE(Y1)	LFDU	a6,  1 * SIZE(AO1)	LFDU	y07, 1 * SIZE(Y1)	LFDU	a7,  1 * SIZE(AO1)	LFDU	y08, 1 * SIZE(Y1)	LFDU	a8,  1 * SIZE(AO1)	bdz	LL(43)	.align 4LL(42):#ifdef PPCG4	dcbtst	Y1, PREA#endif	FMADD	y09, alpha1, a1, y01	LFDU	a1,  1 * SIZE(AO1)	FMADD	y10, alpha1, a2, y02	LFDU	a2,  1 * SIZE(AO1)	FMADD	y11, alpha1, a3, y03	LFDU	a3,  1 * SIZE(AO1)	FMADD	y12, alpha1, a4, y04	LFDU	a4,  1 * SIZE(AO1)	LFDU	y01, 1 * SIZE(Y1)	LFDU	y02, 1 * SIZE(Y1)	LFDU	y03, 1 * SIZE(Y1)	LFDU	y04, 1 * SIZE(Y1)#ifdef PPCG4	dcbt	AO1, PREA#endif	FMADD	y13, alpha1, a5, y05	LFDU	a5,  1 * SIZE(AO1)	FMADD	y14, alpha1, a6, y06	LFDU	a6,  1 * SIZE(AO1)	FMADD	y15, alpha1, a7, y07	LFDU	a7,  1 * SIZE(AO1)	FMADD	y16, alpha1, a8, y08	LFDU	a8,  1 * SIZE(AO1)#if defined(PPCG4) && defined(DOUBLE)	dcbtst	Y1, PREA#endif	LFDU	y05, 1 * SIZE(Y1)	LFDU	y06, 1 * SIZE(Y1)	LFDU	y07, 1 * SIZE(Y1)	LFDU	y08, 1 * SIZE(Y1)#if defined(PPCG4) && defined(DOUBLE)	dcbt	AO1, PREA#endif	STFDU	y09, 1 * SIZE(Y2)	STFDU	y10, 1 * SIZE(Y2)	STFDU	y11, 1 * SIZE(Y2)	STFDU	y12, 1 * SIZE(Y2)	STFDU	y13, 1 * SIZE(Y2)	STFDU	y14, 1 * SIZE(Y2)	STFDU	y15, 1 * SIZE(Y2)	STFDU	y16, 1 * SIZE(Y2)	bdnz	LL(42)	.align 4LL(43):	FMADD	y01, alpha1, a1, y01	FMADD	y02, alpha1, a2, y02	FMADD	y03, alpha1, a3, y03	FMADD	y04, alpha1, a4, y04	FMADD	y05, alpha1, a5, y05	STFDU	y01, 1 * SIZE(Y2)	FMADD	y06, alpha1, a6, y06	STFDU	y02, 1 * SIZE(Y2)	FMADD	y07, alpha1, a7, y07	STFDU	y03, 1 * SIZE(Y2)	FMADD	y08, alpha1, a8, y08	STFDU	y04, 1 * SIZE(Y2)	STFDU	y05, 1 * SIZE(Y2)	STFDU	y06, 1 * SIZE(Y2)	STFDU	y07, 1 * SIZE(Y2)	STFDU	y08, 1 * SIZE(Y2)	.align 4LL(45):	andi.	r0, M, 7	ble	LL(990)	andi.	r0, M, 4	ble	LL(47)	LFDU	a1,  1 * SIZE(AO1)	LFDU	y01, 1 * SIZE(Y1)	LFDU	a2,  1 * SIZE(AO1)	LFDU	y02, 1 * SIZE(Y1)	LFDU	a3,  1 * SIZE(AO1)	LFDU	y03, 1 * SIZE(Y1)	LFDU	a4,  1 * SIZE(AO1)	LFDU	y04, 1 * SIZE(Y1)	FMADD	y01, alpha1, a1, y01	FMADD	y02, alpha1, a2, y02	FMADD	y03, alpha1, a3, y03	FMADD	y04, alpha1, a4, y04	STFDU	y01, 1 * SIZE(Y2)	STFDU	y02, 1 * SIZE(Y2)	STFDU	y03, 1 * SIZE(Y2)	STFDU	y04, 1 * SIZE(Y2)	.align 4LL(47):	andi.	r0, M, 2	ble	LL(48)	LFDU	a1,  1 * SIZE(AO1)	LFDU	y01, 1 * SIZE(Y1)	LFDU	a2,  1 * SIZE(AO1)	LFDU	y02, 1 * SIZE(Y1)	FMADD	y01, alpha1, a1, y01	FMADD	y02, alpha1, a2, y02	STFDU	y01, 1 * SIZE(Y2)	STFDU	y02, 1 * SIZE(Y2)	.align 4LL(48):	andi.	r0, M, 1	ble	LL(990)	LFDU	a1,  1 * SIZE(AO1)	LFDU	y01, 1 * SIZE(Y1)	FMADD	y01, alpha1, a1, y01	STFDU	y01, 1 * SIZE(Y2)	.align 4LL(990):	cmpi	cr0, 0, INCY, SIZE	beq	LL(999)	addi	YY, BUFFER, -SIZE	mr	Y1, Y	srawi.	r0, M, 3	mtspr	CTR, r0	ble	LL(995)	.align 4LL(991):	LFDUX	f0, Y, INCY	LFDUX	f1, Y, INCY	LFDUX	f2, Y, INCY	LFDUX	f3, Y, INCY	LFDUX	f4, Y, INCY	LFDUX	f5, Y, INCY	LFDUX	f6, Y, INCY	LFDUX	f7, Y, INCY	LFDU	f8,   1 * SIZE(YY)	LFDU	f9,   1 * SIZE(YY)	LFDU	f10,  1 * SIZE(YY)	LFDU	f11,  1 * SIZE(YY)	LFDU	f12,  1 * SIZE(YY)	LFDU	f13,  1 * SIZE(YY)	LFDU	f14,  1 * SIZE(YY)	LFDU	f15,  1 * SIZE(YY)	FADD	f8,  f8,  f0	FADD	f9,  f9,  f1	FADD	f10, f10, f2	FADD	f11, f11, f3	FADD	f12, f12, f4	FADD	f13, f13, f5	FADD	f14, f14, f6	FADD	f15, f15, f7	STFDUX	f8,  Y1, INCY	STFDUX	f9,  Y1, INCY	STFDUX	f10, Y1, INCY	STFDUX	f11, Y1, INCY	STFDUX	f12, Y1, INCY	STFDUX	f13, Y1, INCY	STFDUX	f14, Y1, INCY	STFDUX	f15, Y1, INCY	bdnz	LL(991)	.align 4LL(995):	andi.	J, M, 4	ble	LL(996)	LFDUX	f0, Y, INCY	LFDUX	f1, Y, INCY	LFDUX	f2, Y, INCY	LFDUX	f3, Y, INCY	LFDU	f8,   1 * SIZE(YY)	LFDU	f9,   1 * SIZE(YY)	LFDU	f10,  1 * SIZE(YY)	LFDU	f11,  1 * SIZE(YY)	FADD	f8,  f8,  f0	FADD	f9,  f9,  f1	FADD	f10, f10, f2	FADD	f11, f11, f3	STFDUX	f8,  Y1, INCY	STFDUX	f9,  Y1, INCY	STFDUX	f10, Y1, INCY	STFDUX	f11, Y1, INCY	.align 4LL(996):	andi.	J, M, 2	ble	LL(997)	LFDUX	f0, Y, INCY	LFDUX	f1, Y, INCY	LFDU	f8,   1 * SIZE(YY)	LFDU	f9,   1 * SIZE(YY)	FADD	f8,  f8,  f0	FADD	f9,  f9,  f1	STFDUX	f8,  Y1, INCY	STFDUX	f9,  Y1, INCY	.align 4LL(997):	andi.	J, M, 1	ble	LL(999)	LFDUX	f0, Y, INCY	LFDU	f8,   1 * SIZE(YY)	FADD	f8,  f8,  f0	STFDUX	f8,  Y1, INCY	.align 4LL(999):	li	r3, 0	lfd	f14,     0(SP)	lfd	f15,     8(SP)	lfd	f16,    16(SP)	lfd	f17,    24(SP)	lfd	f18,    32(SP)	lfd	f19,    40(SP)	lfd	f20,    48(SP)	lfd	f21,    56(SP)	lfd	f22,    64(SP)	lfd	f23,    72(SP)	lfd	f24,    80(SP)	lfd	f25,    88(SP)	lfd	f26,    96(SP)	lfd	f27,   104(SP)#ifdef __64BIT__	ld	r14,   144(SP)	ld	r15,   152(SP)	ld	r16,   160(SP)	ld	r17,   168(SP)	ld	r18,   176(SP)	ld	r19,   184(SP)	ld	r20,   192(SP)	ld	r21,   200(SP)	ld	r22,   208(SP)	ld	r23,   216(SP)#else	lwz	r14,   144(SP)	lwz	r15,   148(SP)	lwz	r16,   152(SP)	lwz	r17,   156(SP)	lwz	r18,   160(SP)	lwz	r19,   164(SP)	lwz	r20,   168(SP)	lwz	r21,   172(SP)	lwz	r22,   176(SP)	lwz	r23,   180(SP)#endif	addi	SP, SP, STACKSIZE	blr	EPILOGUE#endif

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -