⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 gemv_hummer_n.s

📁 Optimized GotoBLAS libraries
💻 S
📖 第 1 页 / 共 3 页
字号:
	STFSDUX	ys1, YS, INCY2	b	.L999	.align 4.L70:	sub	A, A, INC2	sub	Y, Y, INCY	srawi.	J, N, 2	ble	.L80	.align 4.L71:	LFDUX	alpha1, X, INCX	mr	A1, A	add	A2, A,  LDA	add	A3, A2, LDA	LFSDUX	alpha1, X, INCX	LFDUX	alpha2, X, INCX	add	A4, A3, LDA	add	A,  A4, LDA	mr	YL, Y	LFSDUX	alpha2, X, INCX	fpmul	alpha1, alpha, alpha1	mr	YS, Y	srawi.	r0,  M, 3	mtspr	CTR, r0	fpmul	alpha2, alpha, alpha2	ble	.L75	LFDUX	yl1, YL, INCY	LFPDUX	a1,  A1, INC2	LFPDUX	a5,  A1, INC2	LFPDUX	a9,  A1, INC2	LFPDUX	a13, A1, INC2	LFSDUX	yl1, YL, INCY	LFDUX	yl2, YL, INCY	LFPDUX	a2,  A2, INC2	LFPDUX	a6,  A2, INC2	LFPDUX	a10, A2, INC2	LFPDUX	a14, A2, INC2	LFSDUX	yl2, YL, INCY	LFDUX	yl3, YL, INCY	LFPDUX	a3,  A3, INC2	LFPDUX	a7,  A3, INC2	LFPDUX	a11, A3, INC2	LFPDUX	a15, A3, INC2	LFSDUX	yl3, YL, INCY	LFDUX	yl4, YL, INCY	LFPDUX	a4,  A4, INC2	LFPDUX	a8,  A4, INC2	LFPDUX	a12, A4, INC2	LFPDUX	a16, A4, INC2	LFSDUX	yl4, YL, INCY	bdz	.L73	.align 4.L72:	fxcpmadd  ys1, alpha1, a1,  yl1	LFPDUX	a1,  A1, INC2	LFDUX	yl1, YL, INCY	fxcpmadd  ys2, alpha1, a5,  yl2	LFPDUX	a5,  A1, INC2	fxcpmadd  ys3, alpha1, a9,  yl3	LFPDUX	a9,  A1, INC2	fxcpmadd  ys4, alpha1, a13, yl4	LFPDUX	a13, A1, INC2	LFSDUX	yl1, YL, INCY	fxcsmadd  ys1, alpha1, a2,  ys1	LFPDUX	a2,  A2, INC2	LFDUX	yl2, YL, INCY	fxcsmadd  ys2, alpha1, a6,  ys2	LFPDUX	a6,  A2, INC2	fxcsmadd  ys3, alpha1, a10, ys3	LFPDUX	a10, A2, INC2	fxcsmadd  ys4, alpha1, a14, ys4	LFPDUX	a14, A2, INC2	LFSDUX	yl2, YL, INCY	fxcpmadd  ys1, alpha2, a3,  ys1	LFPDUX	a3,  A3, INC2	LFDUX	yl3, YL, INCY	fxcpmadd  ys2, alpha2, a7,  ys2	LFPDUX	a7,  A3, INC2	fxcpmadd  ys3, alpha2, a11, ys3	LFPDUX	a11, A3, INC2	fxcpmadd  ys4, alpha2, a15, ys4	LFPDUX	a15, A3, INC2	LFSDUX	yl3, YL, INCY	fxcsmadd  ys1, alpha2, a4,  ys1	LFPDUX	a4,  A4, INC2	LFDUX	yl4, YL, INCY	fxcsmadd  ys2, alpha2, a8,  ys2	LFPDUX	a8,  A4, INC2	fxcsmadd  ys3, alpha2, a12, ys3	LFPDUX	a12, A4, INC2	fxcsmadd  ys4, alpha2, a16, ys4	LFPDUX	a16, A4, INC2	LFSDUX	yl4, YL, INCY	STFDUX	ys1, YS, INCY	STFSDUX	ys1, YS, INCY	STFDUX	ys2, YS, INCY	STFSDUX	ys2, YS, INCY	STFDUX	ys3, YS, INCY	STFSDUX	ys3, YS, INCY	STFDUX	ys4, YS, INCY	STFSDUX	ys4, YS, INCY	bdnz	.L72	.align 4.L73:	fxcpmadd  ys1, alpha1, a1,  yl1	fxcpmadd  ys2, alpha1, a5,  yl2	fxcpmadd  ys3, alpha1, a9,  yl3	fxcpmadd  ys4, alpha1, a13, yl4	fxcsmadd  ys1, alpha1, a2,  ys1	fxcsmadd  ys2, alpha1, a6,  ys2	fxcsmadd  ys3, alpha1, a10, ys3	fxcsmadd  ys4, alpha1, a14, ys4	fxcpmadd  ys1, alpha2, a3,  ys1	fxcpmadd  ys2, alpha2, a7,  ys2	fxcpmadd  ys3, alpha2, a11, ys3	fxcpmadd  ys4, alpha2, a15, ys4	fxcsmadd  ys1, alpha2, a4,  ys1	fxcsmadd  ys2, alpha2, a8,  ys2	fxcsmadd  ys3, alpha2, a12, ys3	fxcsmadd  ys4, alpha2, a16, ys4	STFDUX	ys1, YS, INCY	STFSDUX	ys1, YS, INCY	STFDUX	ys2, YS, INCY	STFSDUX	ys2, YS, INCY	STFDUX	ys3, YS, INCY	STFSDUX	ys3, YS, INCY	STFDUX	ys4, YS, INCY	STFSDUX	ys4, YS, INCY	.align 4.L75:	andi.	r0, M, 7	ble	.L79	andi.	r0, M, 4	ble	.L77	LFDUX	yl1, YL, INCY	LFPDUX	a1,  A1, INC2	LFPDUX	a5,  A1, INC2	LFSDUX	yl1, YL, INCY	LFPDUX	a2,  A2, INC2	LFPDUX	a6,  A2, INC2	LFDUX	yl2, YL, INCY	LFPDUX	a3,  A3, INC2	LFPDUX	a7,  A3, INC2	LFSDUX	yl2, YL, INCY	LFPDUX	a4,  A4, INC2	LFPDUX	a8,  A4, INC2	fxcpmadd  ys1, alpha1, a1, yl1	fxcpmadd  ys2, alpha1, a5, yl2	fxcsmadd  ys1, alpha1, a2, ys1	fxcsmadd  ys2, alpha1, a6, ys2	fxcpmadd  ys1, alpha2, a3, ys1	fxcpmadd  ys2, alpha2, a7, ys2	fxcsmadd  ys1, alpha2, a4, ys1	fxcsmadd  ys2, alpha2, a8, ys2	STFDUX	ys1, YS, INCY	STFSDUX	ys1, YS, INCY	STFDUX	ys2, YS, INCY	STFSDUX	ys2, YS, INCY	.align 4.L77:	andi.	r0, M, 2	ble	.L78	LFDUX	yl1, YL, INCY	LFPDUX	a1,  A1, INC2	LFPDUX	a2,  A2, INC2	LFSDUX	yl1, YL, INCY	LFPDUX	a3,  A3, INC2	LFPDUX	a4,  A4, INC2	fxcpmadd  ys1, alpha1, a1, yl1	fxcsmadd  ys1, alpha1, a2, ys1	fxcpmadd  ys1, alpha2, a3, ys1	fxcsmadd  ys1, alpha2, a4, ys1	STFDUX	ys1, YS, INCY	STFSDUX	ys1, YS, INCY	.align 4.L78:	andi.	r0, M, 1	ble	.L79	LFDUX	yl1, YL, INCY	LFDUX	a1,  A1, INC2	LFDUX	a2,  A2, INC2	LFDUX	a3,  A3, INC2	LFDUX	a4,  A4, INC2	fxcpmadd  ys1, alpha1, a1, yl1	fxcsmadd  ys1, alpha1, a2, ys1	fxcpmadd  ys1, alpha2, a3, ys1	fxcsmadd  ys1, alpha2, a4, ys1	STFDUX	ys1, YS, INCY	.align 4.L79:	addi	J, J, -1	cmpi	cr0, 0, J, 0	bgt	.L71	.align 4	.L80:	andi.	J, N, 2	ble	.L90	LFDUX	alpha1, X, INCX	mr	A1, A	add	A2, A,  LDA	add	A,  A2, LDA	LFSDUX	alpha1, X, INCX	mr	YL, Y	mr	YS, Y	fpmul	alpha1, alpha, alpha1	srawi.	r0,  M, 3	mtspr	CTR, r0	ble	.L85	LFDUX	yl1, YL, INCY	LFDUX	a9,  YL, INCY	LFDUX	yl2, YL, INCY	LFDUX	a10, YL, INCY	LFPDUX	a1,  A1, INC2	LFPDUX	a5,  A1, INC2	LFPDUX	a3,  A1, INC2	LFPDUX	a7,  A1, INC2	LFDUX	yl3, YL, INCY	LFDUX	a11, YL, INCY	LFDUX	yl4, YL, INCY	LFDUX	a12, YL, INCY	LFPDUX	a2,  A2, INC2	LFPDUX	a6,  A2, INC2	LFPDUX	a4,  A2, INC2	LFPDUX	a8,  A2, INC2	bdz	.L83	.align 4.L82:	fsmfp	yl1, a9	fsmfp	yl2, a10	fsmfp	yl3, a11	fsmfp	yl4, a12	fxcpmadd  ys1, alpha1, a1,  yl1	LFDUX	yl1, YL, INCY	LFDUX	a9,  YL, INCY	LFPDUX	a1,  A1, INC2	fxcpmadd  ys2, alpha1, a5,  yl2	LFDUX	yl2, YL, INCY	LFDUX	a10, YL, INCY	LFPDUX	a5,  A1, INC2	fxcpmadd  ys3, alpha1, a3,  yl3	LFDUX	yl3, YL, INCY	LFDUX	a11, YL, INCY	LFPDUX	a3,  A1, INC2	fxcpmadd  ys4, alpha1, a7,  yl4	LFDUX	yl4, YL, INCY	LFDUX	a12, YL, INCY	LFPDUX	a7,  A1, INC2	fxcsmadd  ys1, alpha1, a2,  ys1	LFPDUX	a2,  A2, INC2	fxcsmadd  ys2, alpha1, a6,  ys2	LFPDUX	a6,  A2, INC2	fxcsmadd  ys3, alpha1, a4,  ys3	LFPDUX	a4,  A2, INC2	fxcsmadd  ys4, alpha1, a8,  ys4	LFPDUX	a8,  A2, INC2	STFDUX	ys1, YS, INCY	STFSDUX	ys1, YS, INCY	STFDUX	ys2, YS, INCY	STFSDUX	ys2, YS, INCY	STFDUX	ys3, YS, INCY	STFSDUX	ys3, YS, INCY	STFDUX	ys4, YS, INCY	STFSDUX	ys4, YS, INCY	bdnz	.L82	.align 4.L83:	fsmfp	yl1, a9	fsmfp	yl2, a10	fsmfp	yl3, a11	fsmfp	yl4, a12	fxcpmadd  ys1, alpha1, a1,  yl1	fxcpmadd  ys2, alpha1, a5,  yl2	fxcpmadd  ys3, alpha1, a3,  yl3	fxcpmadd  ys4, alpha1, a7,  yl4	fxcsmadd  ys1, alpha1, a2,  ys1	fxcsmadd  ys2, alpha1, a6,  ys2	fxcsmadd  ys3, alpha1, a4,  ys3	fxcsmadd  ys4, alpha1, a8,  ys4	STFDUX	ys1, YS, INCY	STFSDUX	ys1, YS, INCY	STFDUX	ys2, YS, INCY	STFSDUX	ys2, YS, INCY	STFDUX	ys3, YS, INCY	STFSDUX	ys3, YS, INCY	STFDUX	ys4, YS, INCY	STFSDUX	ys4, YS, INCY	.align 4.L85:	andi.	r0, M, 7	ble	.L90	andi.	r0, M, 4	ble	.L87	LFDUX	yl1, YL, INCY	LFPDUX	a1,  A1, INC2	LFPDUX	a2,  A2, INC2	LFSDUX	yl1, YL, INCY	LFDUX	yl2, YL, INCY	LFPDUX	a5,  A1, INC2	LFPDUX	a6,  A2, INC2	LFSDUX	yl2, YL, INCY	fxcpmadd  ys1, alpha1, a1, yl1	fxcpmadd  ys2, alpha1, a5, yl2	fxcsmadd  ys1, alpha1, a2, ys1	fxcsmadd  ys2, alpha1, a6, ys2	STFDUX	ys1, YS, INCY	STFSDUX	ys1, YS, INCY	STFDUX	ys2, YS, INCY	STFSDUX	ys2, YS, INCY	.align 4.L87:	andi.	r0, M, 2	ble	.L88	LFDUX	yl1, YL, INCY	LFPDUX	a1,  A1, INC2	LFPDUX	a2,  A2, INC2	LFSDUX	yl1, YL, INCY	fxcpmadd  ys1, alpha1, a1, yl1	fxcsmadd  ys1, alpha1, a2, ys1	STFDUX	ys1, YS, INCY	STFSDUX	ys1, YS, INCY	.align 4.L88:	andi.	r0, M, 1	ble	.L90	LFDUX	yl1, YL, INCY	LFDUX	a1,  A1, INC2	LFDUX	a2,  A2, INC2	fxcpmadd  ys1, alpha1, a1, yl1	fxcsmadd  ys1, alpha1, a2, ys1	STFDUX	ys1, YS, INCY	.align 4.L90:	andi.	J, N, 1	ble	.L999	LFDUX	alpha1, X, INCX	mr	A1, A	mr	YL, Y	mr	YS, Y	fmul	alpha1, alpha, alpha1	srawi.	r0,  M, 3	mtspr	CTR, r0	ble	.L95	LFDUX	yl1, YL, INCY	LFSDUX	a2,  YL, INCY	LFDUX	yl2, YL, INCY	LFSDUX	a4,  YL, INCY	LFDUX	yl3, YL, INCY	LFSDUX	a6,  YL, INCY	LFDUX	yl4, YL, INCY	LFSDUX	a8,  YL, INCY	LFPDUX	a1,  A1, INC2	LFPDUX	a5,  A1, INC2	LFPDUX	a9,  A1, INC2	LFPDUX	a13, A1, INC2	bdz	.L93	.align 4.L92:	fmr	a2, yl1	fmr	a4, yl2	fmr	a6, yl3	fmr	a8, yl4	fxcpmadd  ys1, alpha1, a1,  a2	LFDUX	yl1, YL, INCY	LFSDUX	a2,  YL, INCY	fxcpmadd  ys2, alpha1, a5,  a4	LFDUX	yl2, YL, INCY	LFSDUX	a4,  YL, INCY	fxcpmadd  ys3, alpha1, a9,  a6	LFDUX	yl3, YL, INCY	LFSDUX	a6,  YL, INCY	fxcpmadd  ys4, alpha1, a13, a8	LFDUX	yl4, YL, INCY	LFSDUX	a8,  YL, INCY	LFPDUX	a1,  A1, INC2	LFPDUX	a5,  A1, INC2	LFPDUX	a9,  A1, INC2	LFPDUX	a13, A1, INC2	STFDUX	ys1, YS, INCY	STFSDUX	ys1, YS, INCY	STFDUX	ys2, YS, INCY	STFSDUX	ys2, YS, INCY	STFDUX	ys3, YS, INCY	STFSDUX	ys3, YS, INCY	STFDUX	ys4, YS, INCY	STFSDUX	ys4, YS, INCY	bdnz	.L92	.align 4.L93:	fmr	a2, yl1	fmr	a4, yl2	fmr	a6, yl3	fmr	a8, yl4	fxcpmadd  ys1, alpha1, a1,  a2	fxcpmadd  ys2, alpha1, a5,  a4	fxcpmadd  ys3, alpha1, a9,  a6	fxcpmadd  ys4, alpha1, a13, a8	STFDUX	ys1, YS, INCY	STFSDUX	ys1, YS, INCY	STFDUX	ys2, YS, INCY	STFSDUX	ys2, YS, INCY	STFDUX	ys3, YS, INCY	STFSDUX	ys3, YS, INCY	STFDUX	ys4, YS, INCY	STFSDUX	ys4, YS, INCY	.align 4.L95:	andi.	r0, M, 7	ble	.L999	andi.	r0, M, 4	ble	.L97	LFPDUX	a1,  A1, INC2	LFDUX	yl1, YL, INCY	LFDUX	yl2, YL, INCY	LFPDUX	a2,  A1, INC2	LFDUX	yl3, YL, INCY	LFDUX	yl4, YL, INCY	fxcpmadd  ys1, a1, alpha1, yl1	fxcsmadd  ys2, a1, alpha1, yl2	fxcpmadd  ys3, a2, alpha1, yl3	fxcsmadd  ys4, a2, alpha1, yl4	STFDUX	ys1, YS, INCY	STFDUX	ys2, YS, INCY	STFDUX	ys3, YS, INCY	STFDUX	ys4, YS, INCY	.align 4.L97:	andi.	r0, M, 2	ble	.L98	LFPDUX	a1,  A1, INC2	LFDUX	yl1, YL, INCY	LFDUX	yl2, YL, INCY	fxcpmadd  ys1, a1, alpha1, yl1	fxcsmadd  ys2, a1, alpha1, yl2	STFDUX	ys1, YS, INCY	STFDUX	ys2, YS, INCY	.align 4.L98:	andi.	r0, M, 1	ble	.L999	LFDUX	yl1, YL, INCY	LFDUX	a1,  A1, INC2	fxcpmadd  ys1, alpha1, a1, yl1	STFDUX	ys1, YS, INCY	b	.L999	.align 4.L999:	addi	SP, SP, -4	lwzu	r16,   4(SP)	lwzu	r17,   4(SP)	lwzu	r18,   4(SP)	lwzu	r19,   4(SP)	lwzu	r20,   4(SP)	lwzu	r21,   4(SP)	lwzu	r22,   4(SP)	lwzu	r23,   4(SP)	lwzu	r24,   4(SP)	lwzu	r25,   4(SP)	lwzu	r26,   4(SP)	lwzu	r27,   4(SP)	lwzu	r28,   4(SP)	lwzu	r29,   4(SP)	lwzu	r30,   4(SP)	lwzu	r31,   4(SP)	subi	SP, SP, 12	li	r0, 16	lfpdux	f31, SP, r0	lfpdux	f30, SP, r0	lfpdux	f29, SP, r0	lfpdux	f28, SP, r0	lfpdux	f27, SP, r0	lfpdux	f26, SP, r0	lfpdux	f25, SP, r0	lfpdux	f24, SP, r0	lfpdux	f23, SP, r0	lfpdux	f22, SP, r0	lfpdux	f21, SP, r0	lfpdux	f20, SP, r0	lfpdux	f19, SP, r0	lfpdux	f18, SP, r0	lfpdux	f17, SP, r0	lfpdux	f16, SP, r0	lfpdux	f15, SP, r0	lfpdux	f14, SP, r0	addi	SP, SP, 16	blr	EPILOGUE

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -