⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 zgemv_t_ppc440.s

📁 Optimized GotoBLAS libraries
💻 S
📖 第 1 页 / 共 2 页
字号:
LL(15):	andi.	r0, M, 2	ble	LL(17)	LFDU	a1, 1 * SIZE(AO1)	LFDU	b1, 1 * SIZE(X1)	LFDU	a2, 1 * SIZE(AO1)	LFDU	b2, 1 * SIZE(X1)	LFDU	a3, 1 * SIZE(AO2)	LFDU	b3, 1 * SIZE(X1)	LFDU	a4, 1 * SIZE(AO2)	LFDU	b4, 1 * SIZE(X1)	FMADD1	y1,  a1,  b1, y1	LFDU	a5, 1 * SIZE(AO3)	FMADD2	y2,  a1,  b2, y2	LFDU	a6, 1 * SIZE(AO3)	FMADD1	y3,  a3,  b1, y3	LFDU	a7, 1 * SIZE(AO4)	FMADD2	y4,  a3,  b2, y4	LFDU	a8, 1 * SIZE(AO4)	FMADD3	y1,  a2,  b2, y1	LFDU	a1, 1 * SIZE(AO1)	FMADD4	y2,  a2,  b1, y2	LFDU	a2, 1 * SIZE(AO1)	FMADD3	y3,  a4,  b2, y3	LFDU	a3, 1 * SIZE(AO2)	FMADD4	y4,  a4,  b1, y4	LFDU	a4, 1 * SIZE(AO2)	FMADD1	y5,  a5,  b1, y5	FMADD2	y6,  a5,  b2, y6	FMADD1	y7,  a7,  b1, y7	FMADD2	y8,  a7,  b2, y8	FMADD3	y5,  a6,  b2, y5	LFDU	a5, 1 * SIZE(AO3)	FMADD4	y6,  a6,  b1, y6	LFDU	a6, 1 * SIZE(AO3)	FMADD3	y7,  a8,  b2, y7	LFDU	a7, 1 * SIZE(AO4)	FMADD4	y8,  a8,  b1, y8	LFDU	a8, 1 * SIZE(AO4)	FMADD1	y1,  a1,  b3, y1	FMADD2	y2,  a1,  b4, y2	FMADD1	y3,  a3,  b3, y3	FMADD2	y4,  a3,  b4, y4	FMADD3	y1,  a2,  b4, y1	FMADD4	y2,  a2,  b3, y2	FMADD3	y3,  a4,  b4, y3	FMADD4	y4,  a4,  b3, y4	FMADD1	y5,  a5,  b3, y5	FMADD2	y6,  a5,  b4, y6	FMADD1	y7,  a7,  b3, y7	FMADD2	y8,  a7,  b4, y8	FMADD3	y5,  a6,  b4, y5	FMADD4	y6,  a6,  b3, y6	FMADD3	y7,  a8,  b4, y7	FMADD4	y8,  a8,  b3, y8	.align 4LL(17):	andi.	r0, M, 1	ble	LL(19)	LFDU	a1, 1 * SIZE(AO1)	LFDU	a2, 1 * SIZE(AO1)	LFDU	a3, 1 * SIZE(AO2)	LFDU	a4, 1 * SIZE(AO2)	LFDU	a5, 1 * SIZE(AO3)	LFDU	a6, 1 * SIZE(AO3)	LFDU	a7, 1 * SIZE(AO4)	LFDU	a8, 1 * SIZE(AO4)	LFDU	b1, 1 * SIZE(X1)	LFDU	b2, 1 * SIZE(X1)	FMADD1	y1,  a1,  b1, y1	FMADD2	y2,  a1,  b2, y2	FMADD1	y3,  a3,  b1, y3	FMADD2	y4,  a3,  b2, y4	FMADD3	y1,  a2,  b2, y1	FMADD4	y2,  a2,  b1, y2	FMADD3	y3,  a4,  b2, y3	FMADD4	y4,  a4,  b1, y4	FMADD1	y5,  a5,  b1, y5	FMADD2	y6,  a5,  b2, y6	FMADD1	y7,  a7,  b1, y7	FMADD2	y8,  a7,  b2, y8	FMADD3	y5,  a6,  b2, y5	FMADD4	y6,  a6,  b1, y6	FMADD3	y7,  a8,  b2, y7	FMADD4	y8,  a8,  b1, y8	.align 4LL(19):	LFDUX	b1,  Y, INCY	LFDU	b2,  1 * SIZE(Y)	LFDUX	b3,  Y, INCY	LFDU	b4,  1 * SIZE(Y)	LFDUX	b5,  Y, INCY	LFDU	b6,  1 * SIZE(Y)	LFDUX	b7,  Y, INCY	LFDU	b8,  1 * SIZE(Y)	FMADD	b1, alpha_r, y1, b1	FMADDR	b2, alpha_r, y2, b2	FMADD	b3, alpha_r, y3, b3	FMADDR	b4, alpha_r, y4, b4	FMADD	b5, alpha_r, y5, b5	FMADDR	b6, alpha_r, y6, b6	FMADD	b7, alpha_r, y7, b7	FMADDR	b8, alpha_r, y8, b8	FMSUBR	b1, alpha_i, y2, b1	FMADD	b2, alpha_i, y1, b2	FMSUBR	b3, alpha_i, y4, b3	FMADD	b4, alpha_i, y3, b4	FMSUBR	b5, alpha_i, y6, b5	FMADD	b6, alpha_i, y5, b6	FMSUBR	b7, alpha_i, y8, b7	FMADD	b8, alpha_i, y7, b8	STFDUX	b1,  YY, INCY	STFDU	b2,  1 * SIZE(YY)	STFDUX	b3,  YY, INCY	STFDU	b4,  1 * SIZE(YY)	STFDUX	b5,  YY, INCY	STFDU	b6,  1 * SIZE(YY)	STFDUX	b7,  YY, INCY	STFDU	b8,  1 * SIZE(YY)	addi	J, J, -1	cmpwi	cr0, J, 0	bgt	LL(11)	.align 4	LL(20):	andi.	J, N, 2	ble	LL(30)	lfd	 y1,  FZERO	mr     AO1, A	fmr	 y2,  y1	mr     X1, XP	fmr	 y3,  y1	add    AO2, A,   LDA	fmr	 y4,  y1	add    A,   AO2, LDA	srawi.	r0,  M, 2	mtspr	CTR, r0	ble	LL(25)	LFDU	a1, 1 * SIZE(AO1)	LFDU	b1, 1 * SIZE(X1)	LFDU	a2, 1 * SIZE(AO1)	LFDU	b2, 1 * SIZE(X1)	LFDU	a3, 1 * SIZE(AO2)	bdz	LL(23)	.align 5LL(22):	FMADD1	y1,  a1,  b1, y1	LFDU	a4, 1 * SIZE(AO2)	FMADD2	y2,  a1,  b2, y2	LFDU	b3, 1 * SIZE(X1)	FMADD1	y3,  a3,  b1, y3	LFDU	b4, 1 * SIZE(X1)	FMADD2	y4,  a3,  b2, y4#ifdef PPCG4	dcbt	AO1, PREA#endif	FMADD3	y1,  a2,  b2, y1	LFDU	a1, 1 * SIZE(AO1)	FMADD4	y2,  a2,  b1, y2	LFDU	a2, 1 * SIZE(AO1)	FMADD3	y3,  a4,  b2, y3	LFDU	a3, 1 * SIZE(AO2)	FMADD4	y4,  a4,  b1, y4	LFDU	a4, 1 * SIZE(AO2)#ifdef PPCG4	dcbt	AO2, PREA#endif	FMADD1	y1,  a1,  b3, y1	LFDU	b1, 1 * SIZE(X1)	FMADD2	y2,  a1,  b4, y2	LFDU	b2, 1 * SIZE(X1)	FMADD1	y3,  a3,  b3, y3	FMADD2	y4,  a3,  b4, y4#ifdef PPCG4	dcbt	X1, PREA#endif	FMADD3	y1,  a2,  b4, y1	LFDU	a1, 1 * SIZE(AO1)	FMADD4	y2,  a2,  b3, y2	LFDU	a2, 1 * SIZE(AO1)	FMADD3	y3,  a4,  b4, y3	LFDU	a3, 1 * SIZE(AO2)	FMADD4	y4,  a4,  b3, y4	LFDU	a4, 1 * SIZE(AO2)	FMADD1	y1,  a1,  b1, y1	LFDU	b3, 1 * SIZE(X1)	FMADD2	y2,  a1,  b2, y2	LFDU	b4, 1 * SIZE(X1)	FMADD1	y3,  a3,  b1, y3	FMADD2	y4,  a3,  b2, y4#if defined(PPCG4) && defined(DOUBLE)	dcbt	AO1, PREA#endif	FMADD3	y1,  a2,  b2, y1	LFDU	a1, 1 * SIZE(AO1)	FMADD4	y2,  a2,  b1, y2	LFDU	a2, 1 * SIZE(AO1)	FMADD3	y3,  a4,  b2, y3	LFDU	a3, 1 * SIZE(AO2)	FMADD4	y4,  a4,  b1, y4	LFDU	a4, 1 * SIZE(AO2)#if defined(PPCG4) && defined(DOUBLE)	dcbt	AO2, PREA#endif	FMADD1	y1,  a1,  b3, y1	LFDU	b1, 1 * SIZE(X1)	FMADD2	y2,  a1,  b4, y2	LFDU	b2, 1 * SIZE(X1)	FMADD1	y3,  a3,  b3, y3	FMADD2	y4,  a3,  b4, y4#if defined(PPCG4) && defined(DOUBLE)	dcbt	X1, PREA#endif	FMADD3	y1,  a2,  b4, y1	LFDU	a1, 1 * SIZE(AO1)	FMADD4	y2,  a2,  b3, y2	LFDU	a2, 1 * SIZE(AO1)	FMADD3	y3,  a4,  b4, y3	LFDU	a3, 1 * SIZE(AO2)	FMADD4	y4,  a4,  b3, y4 	bdnz	LL(22)	.align 4	LL(23):	FMADD1	y1,  a1,  b1, y1	LFDU	a4, 1 * SIZE(AO2)	FMADD2	y2,  a1,  b2, y2	LFDU	b3, 1 * SIZE(X1)	FMADD1	y3,  a3,  b1, y3	LFDU	b4, 1 * SIZE(X1)	FMADD2	y4,  a3,  b2, y4	FMADD3	y1,  a2,  b2, y1	LFDU	a1, 1 * SIZE(AO1)	FMADD4	y2,  a2,  b1, y2	LFDU	a2, 1 * SIZE(AO1)	FMADD3	y3,  a4,  b2, y3	LFDU	a3, 1 * SIZE(AO2)	FMADD4	y4,  a4,  b1, y4	LFDU	a4, 1 * SIZE(AO2)	FMADD1	y1,  a1,  b3, y1	LFDU	b1, 1 * SIZE(X1)	FMADD2	y2,  a1,  b4, y2	LFDU	b2, 1 * SIZE(X1)	FMADD1	y3,  a3,  b3, y3	FMADD2	y4,  a3,  b4, y4	FMADD3	y1,  a2,  b4, y1	LFDU	a1, 1 * SIZE(AO1)	FMADD4	y2,  a2,  b3, y2	LFDU	a2, 1 * SIZE(AO1)	FMADD3	y3,  a4,  b4, y3	LFDU	a3, 1 * SIZE(AO2)	FMADD4	y4,  a4,  b3, y4	LFDU	a4, 1 * SIZE(AO2)	FMADD1	y1,  a1,  b1, y1	LFDU	b3, 1 * SIZE(X1)	FMADD2	y2,  a1,  b2, y2	LFDU	b4, 1 * SIZE(X1)	FMADD1	y3,  a3,  b1, y3	FMADD2	y4,  a3,  b2, y4	FMADD3	y1,  a2,  b2, y1	LFDU	a1, 1 * SIZE(AO1)	FMADD4	y2,  a2,  b1, y2	LFDU	a2, 1 * SIZE(AO1)	FMADD3	y3,  a4,  b2, y3	LFDU	a3, 1 * SIZE(AO2)	FMADD4	y4,  a4,  b1, y4	LFDU	a4, 1 * SIZE(AO2)	FMADD1	y1,  a1,  b3, y1	FMADD2	y2,  a1,  b4, y2	FMADD1	y3,  a3,  b3, y3	FMADD2	y4,  a3,  b4, y4	FMADD3	y1,  a2,  b4, y1	FMADD4	y2,  a2,  b3, y2	FMADD3	y3,  a4,  b4, y3	FMADD4	y4,  a4,  b3, y4	.align 4LL(25):	andi.	r0, M, 2	ble	LL(27)	LFDU	a1, 1 * SIZE(AO1)	LFDU	b1, 1 * SIZE(X1)	LFDU	a2, 1 * SIZE(AO1)	LFDU	b2, 1 * SIZE(X1)	LFDU	a3, 1 * SIZE(AO2)	LFDU	b3, 1 * SIZE(X1)	LFDU	a4, 1 * SIZE(AO2)	LFDU	b4, 1 * SIZE(X1)	FMADD1	y1,  a1,  b1, y1	FMADD2	y2,  a1,  b2, y2	FMADD1	y3,  a3,  b1, y3	FMADD2	y4,  a3,  b2, y4	FMADD3	y1,  a2,  b2, y1	LFDU	a1, 1 * SIZE(AO1)	FMADD4	y2,  a2,  b1, y2	LFDU	a2, 1 * SIZE(AO1)	FMADD3	y3,  a4,  b2, y3	LFDU	a3, 1 * SIZE(AO2)	FMADD4	y4,  a4,  b1, y4	LFDU	a4, 1 * SIZE(AO2)	FMADD1	y1,  a1,  b3, y1	FMADD2	y2,  a1,  b4, y2	FMADD1	y3,  a3,  b3, y3	FMADD2	y4,  a3,  b4, y4	FMADD3	y1,  a2,  b4, y1	FMADD4	y2,  a2,  b3, y2	FMADD3	y3,  a4,  b4, y3	FMADD4	y4,  a4,  b3, y4	.align 4LL(27):	andi.	r0, M, 1	ble	LL(29)	LFDU	a1, 1 * SIZE(AO1)	LFDU	a2, 1 * SIZE(AO1)	LFDU	a3, 1 * SIZE(AO2)	LFDU	a4, 1 * SIZE(AO2)	LFDU	b1, 1 * SIZE(X1)	LFDU	b2, 1 * SIZE(X1)	FMADD1	y1,  a1,  b1, y1	FMADD2	y2,  a1,  b2, y2	FMADD1	y3,  a3,  b1, y3	FMADD2	y4,  a3,  b2, y4	FMADD3	y1,  a2,  b2, y1	FMADD4	y2,  a2,  b1, y2	FMADD3	y3,  a4,  b2, y3	FMADD4	y4,  a4,  b1, y4	.align 4LL(29):	LFDUX	b1,  Y, INCY	LFDU	b2,  1 * SIZE(Y)	LFDUX	b3,  Y, INCY	LFDU	b4,  1 * SIZE(Y)	FMADD	b1, alpha_r, y1, b1	FMADDR	b2, alpha_r, y2, b2	FMADD	b3, alpha_r, y3, b3	FMADDR	b4, alpha_r, y4, b4	FMSUBR	b1, alpha_i, y2, b1	FMADD	b2, alpha_i, y1, b2	FMSUBR	b3, alpha_i, y4, b3	FMADD	b4, alpha_i, y3, b4	STFDUX	b1,  YY, INCY	STFDU	b2,  1 * SIZE(YY)	STFDUX	b3,  YY, INCY	STFDU	b4,  1 * SIZE(YY)	.align 4LL(30):	andi.	J, N, 1	ble	LL(999)	lfd	 y1,  FZERO	mr     AO1, A	fmr	 y2,  y1	mr     X1, XP	fmr	 y3,  y1	fmr	 y4,  y1	add    A,   A, LDA	srawi.	r0,  M, 2	mtspr	CTR, r0	ble	LL(35)	LFDU	a1, 1 * SIZE(AO1)	LFDU	b1, 1 * SIZE(X1)	LFDU	a2, 1 * SIZE(AO1)	LFDU	b2, 1 * SIZE(X1)	bdz	LL(33)	.align 5LL(32):	FMADD1	y1,  a1,  b1, y1	LFDU	b3, 1 * SIZE(X1)	FMADD2	y2,  a1,  b2, y2	LFDU	b4, 1 * SIZE(X1)#ifdef PPCG4	dcbt	AO1, PREA#endif	FMADD3	y3,  a2,  b2, y3	LFDU	a1, 1 * SIZE(AO1)	FMADD4	y4,  a2,  b1, y4	LFDU	a2, 1 * SIZE(AO1)	FMADD1	y1,  a1,  b3, y1	LFDU	b1, 1 * SIZE(X1)	FMADD2	y2,  a1,  b4, y2	LFDU	b2, 1 * SIZE(X1)#ifdef PPCG4	dcbt	X1, PREA#endif	FMADD3	y3,  a2,  b4, y3	LFDU	a1, 1 * SIZE(AO1)	FMADD4	y4,  a2,  b3, y4	LFDU	a2, 1 * SIZE(AO1)	FMADD1	y1,  a1,  b1, y1	LFDU	b3, 1 * SIZE(X1)	FMADD2	y2,  a1,  b2, y2	LFDU	b4, 1 * SIZE(X1)#if defined(PPCG4) && defined(DOUBLE)	dcbt	AO1, PREA#endif	FMADD3	y3,  a2,  b2, y3	LFDU	a1, 1 * SIZE(AO1)	FMADD4	y4,  a2,  b1, y4	LFDU	a2, 1 * SIZE(AO1)	FMADD1	y1,  a1,  b3, y1	LFDU	b1, 1 * SIZE(X1)	FMADD2	y2,  a1,  b4, y2	LFDU	b2, 1 * SIZE(X1)#if defined(PPCG4) && defined(DOUBLE)	dcbt	X1, PREA#endif	FMADD3	y3,  a2,  b4, y3	LFDU	a1, 1 * SIZE(AO1)	FMADD4	y4,  a2,  b3, y4	LFDU	a2, 1 * SIZE(AO1) 	bdnz	LL(32)	.align 4	LL(33):	FMADD1	y1,  a1,  b1, y1	LFDU	b3, 1 * SIZE(X1)	FMADD2	y2,  a1,  b2, y2	LFDU	b4, 1 * SIZE(X1)	FMADD3	y3,  a2,  b2, y3	LFDU	a1, 1 * SIZE(AO1)	FMADD4	y4,  a2,  b1, y4	LFDU	a2, 1 * SIZE(AO1)	FMADD1	y1,  a1,  b3, y1	LFDU	b1, 1 * SIZE(X1)	FMADD2	y2,  a1,  b4, y2	LFDU	b2, 1 * SIZE(X1)	FMADD3	y3,  a2,  b4, y3	LFDU	a1, 1 * SIZE(AO1)	FMADD4	y4,  a2,  b3, y4	LFDU	a2, 1 * SIZE(AO1)	FMADD1	y1,  a1,  b1, y1	LFDU	b3, 1 * SIZE(X1)	FMADD2	y2,  a1,  b2, y2	LFDU	b4, 1 * SIZE(X1)	FMADD3	y3,  a2,  b2, y3	LFDU	a1, 1 * SIZE(AO1)	FMADD4	y4,  a2,  b1, y4	LFDU	a2, 1 * SIZE(AO1)	FMADD1	y1,  a1,  b3, y1	FMADD2	y2,  a1,  b4, y2	FMADD3	y3,  a2,  b4, y3	FMADD4	y4,  a2,  b3, y4	.align 4LL(35):	andi.	r0, M, 2	ble	LL(37)	LFDU	a1, 1 * SIZE(AO1)	LFDU	b1, 1 * SIZE(X1)	LFDU	a2, 1 * SIZE(AO1)	LFDU	b2, 1 * SIZE(X1)	FMADD1	y1,  a1,  b1, y1	LFDU	b3, 1 * SIZE(X1)	FMADD2	y2,  a1,  b2, y2	LFDU	a3, 1 * SIZE(AO1)	FMADD3	y3,  a2,  b2, y3	LFDU	b4, 1 * SIZE(X1)	FMADD4	y4,  a2,  b1, y4	LFDU	a4, 1 * SIZE(AO1)	FMADD1	y1,  a3,  b3, y1	FMADD2	y2,  a3,  b4, y2	FMADD3	y3,  a4,  b4, y3	FMADD4	y4,  a4,  b3, y4	.align 4LL(37):	andi.	r0, M, 1	ble	LL(39)	LFDU	a1, 1 * SIZE(AO1)	LFDU	b1, 1 * SIZE(X1)	LFDU	a2, 1 * SIZE(AO1)	LFDU	b2, 1 * SIZE(X1)	FMADD1	y1,  a1,  b1, y1	FMADD2	y2,  a1,  b2, y2	FMADD3	y3,  a2,  b2, y3	FMADD4	y4,  a2,  b1, y4	.align 4LL(39):	LFDUX	b1,  Y, INCY	LFDU	b2,  1 * SIZE(Y)	FADD	y1, y1, y3	FADD	y2, y2, y4	FMADD	b1, alpha_r, y1, b1	FMADDR	b2, alpha_r, y2, b2	FMSUBR	b1, alpha_i, y2, b1	FMADD	b2, alpha_i, y1, b2	STFDUX	b1,  YY, INCY	STFDU	b2,  1 * SIZE(YY)	.align 4LL(999):	li	r3, 0	lfd	f14,     0(SP)	lfd	f15,     8(SP)	lfd	f16,    16(SP)	lfd	f17,    24(SP)	lfd	f18,    32(SP)	lfd	f19,    40(SP)	lfd	f20,    48(SP)	lfd	f21,    56(SP)	lfd	f22,    64(SP)	lfd	f23,    72(SP)	lfd	f24,    80(SP)	lfd	f25,    88(SP)	lfd	f26,    96(SP)	lfd	f27,   104(SP)	lfd	f28,   112(SP)	lfd	f29,   120(SP)	lfd	f30,   128(SP)	lfd	f31,   136(SP)#ifdef __64BIT__	ld	r14,   144(SP)	ld	r15,   152(SP)	ld	r16,   160(SP)	ld	r17,   168(SP)	ld	r18,   176(SP)	ld	r19,   184(SP)	ld	r20,   192(SP)	ld	r21,   200(SP)	ld	r22,   208(SP)#else	lwz	r14,   144(SP)	lwz	r15,   148(SP)	lwz	r16,   152(SP)	lwz	r17,   156(SP)	lwz	r18,   160(SP)	lwz	r19,   164(SP)	lwz	r20,   168(SP)	lwz	r21,   172(SP)	lwz	r22,   176(SP)#endif	addi	SP, SP, STACKSIZE	blr	EPILOGUE#endif

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -