⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 gemv_hummer_n.s

📁 Optimized GotoBLAS libraries
💻 S
📖 第 1 页 / 共 3 页
字号:
	andi.	r0, M, 1	ble	.L999	LFDUX	yl1, YL, INCY2	LFDUX	a1,  A1, INC2	fxcpmadd  ys1, alpha1, a1, yl1	STFDUX	ys1, YS, INCY2	b	.L999	.align 4.L40:# A : aligned  LDA : even  Y : Unaligned	sub	A, A, INC2	sub	Y, Y, INCY	srawi.	J, N, 2	ble	.L50	.align 4.L41:	LFDUX	alpha1, X, INCX	LFSDUX	alpha1, X, INCX	LFDUX	alpha2, X, INCX	LFSDUX	alpha2, X, INCX	fpmul	alpha1, alpha, alpha1	fpmul	alpha2, alpha, alpha2	mr	A1, A	add	A2, A,  LDA	add	A3, A2, LDA	add	A4, A3, LDA	add	A,  A4, LDA	mr	YL, Y	sub	YS, Y, INCY2	LFSDX	ys1, YS, INCY2	LFDX	yl1, YL, INCY	srawi.	r0,  M, 3	mtspr	CTR, r0	ble	.L45	LFPDUX	a1,  A1, INC2	LFPDUX	a5,  A1, INC2	LFPDUX	a9,  A1, INC2	LFPDUX	a13, A1, INC2	LFXDUX	yl2, YL, INCY2	LFXDUX	yl3, YL, INCY2	LFXDUX	yl4, YL, INCY2	LFXDUX	yl5, YL, INCY2	LFPDUX	a2,  A2, INC2	LFPDUX	a6,  A2, INC2	LFPDUX	a10, A2, INC2	LFPDUX	a14, A2, INC2	LFPDUX	a3,  A3, INC2	LFPDUX	a7,  A3, INC2	LFPDUX	a11, A3, INC2	LFPDUX	a15, A3, INC2	LFPDUX	a4,  A4, INC2	fsmr	yl1, yl2	LFPDUX	a8,  A4, INC2	fsmr	yl2, yl3	LFPDUX	a12, A4, INC2	fsmr	yl3, yl4	LFPDUX	a16, A4, INC2	fsmr	yl4, yl5	bdz	.L43	.align 4.L42:	fxcpmadd  ys2, alpha1, a1,  yl1	LFPDUX	a1,  A1, INC2	fxcpmadd  ys3, alpha1, a5,  yl2	LFPDUX	a5,  A1, INC2	fxcpmadd  ys4, alpha1, a9,  yl3	LFPDUX	a9,  A1, INC2	fxcpmadd  ys5, alpha1, a13, yl4	LFPDUX	a13, A1, INC2	fxcsmadd  ys2, alpha1, a2,  ys2	LFPDUX	a2,  A2, INC2	fxcsmadd  ys3, alpha1, a6,  ys3	LFPDUX	a6,  A2, INC2	fxcsmadd  ys4, alpha1, a10, ys4	LFPDUX	a10, A2, INC2	fxcsmadd  ys5, alpha1, a14, ys5	LFPDUX	a14, A2, INC2	fxcpmadd  ys2, alpha2, a3,  ys2	LFPDUX	a3,  A3, INC2	fxcpmadd  ys3, alpha2, a7,  ys3	LFPDUX	a7,  A3, INC2	fxcpmadd  ys4, alpha2, a11, ys4	LFPDUX	a11, A3, INC2	fxcpmadd  ys5, alpha2, a15, ys5	LFPDUX	a15, A3, INC2	fxcsmadd  ys2, alpha2, a4,  ys2	LFPDUX	a4,  A4, INC2	fxcsmadd  ys3, alpha2, a8,  ys3	LFPDUX	a8,  A4, INC2	fxcsmadd  ys4, alpha2, a12, ys4	LFPDUX	a12, A4, INC2	fxcsmadd  ys5, alpha2, a16, ys5	LFPDUX	a16, A4, INC2	fmr	yl1, yl5	LFXDUX	yl2, YL, INCY2	fmr	ys1, ys2	LFXDUX	yl3, YL, INCY2	fmr	ys2, ys3	LFXDUX	yl4, YL, INCY2	fmr	ys3, ys4	LFXDUX	yl5, YL, INCY2	fmr	ys4, ys5	STFXDUX	ys1, YS, INCY2	fsmr	ys1, ys5	STFXDUX	ys2, YS, INCY2	fsmr	yl1, yl2	STFXDUX	ys3, YS, INCY2	fsmr	yl2, yl3	STFXDUX	ys4, YS, INCY2	fsmr	yl3, yl4	fsmr	yl4, yl5	bdnz	.L42	.align 4.L43:	fxcpmadd  ys2, alpha1, a1,  yl1	fxcpmadd  ys3, alpha1, a5,  yl2	fxcpmadd  ys4, alpha1, a9,  yl3	fxcpmadd  ys5, alpha1, a13, yl4	fxcsmadd  ys2, alpha1, a2,  ys2	fxcsmadd  ys3, alpha1, a6,  ys3	fxcsmadd  ys4, alpha1, a10, ys4	fxcsmadd  ys5, alpha1, a14, ys5	fxcpmadd  ys2, alpha2, a3,  ys2	fxcpmadd  ys3, alpha2, a7,  ys3	fxcpmadd  ys4, alpha2, a11, ys4	fxcpmadd  ys5, alpha2, a15, ys5	fxcsmadd  ys2, alpha2, a4,  ys2	fxcsmadd  ys3, alpha2, a8,  ys3	fxcsmadd  ys4, alpha2, a12, ys4	fxcsmadd  ys5, alpha2, a16, ys5	fmr	ys1, ys2	fmr	ys2, ys3	fmr	ys3, ys4	fmr	ys4, ys5	fmr	yl1, yl5	STFXDUX	ys1, YS, INCY2	fsmr	ys1, ys5	STFXDUX	ys2, YS, INCY2	STFXDUX	ys3, YS, INCY2	STFXDUX	ys4, YS, INCY2	.align 4.L45:	andi.	r0, M, 7	ble	.L48	andi.	r0, M, 4	ble	.L46	LFXDUX	yl2, YL, INCY2	LFXDUX	yl3, YL, INCY2	LFPDUX	a1,  A1, INC2	LFPDUX	a5,  A1, INC2	LFPDUX	a2,  A2, INC2	LFPDUX	a6,  A2, INC2	LFPDUX	a3,  A3, INC2	LFPDUX	a7,  A3, INC2	LFPDUX	a4,  A4, INC2	fsmr	yl1, yl2	LFPDUX	a8,  A4, INC2	fsmr	yl2, yl3	fxcpmadd  ys2, alpha1, a1, yl1	fxcpmadd  ys3, alpha1, a5, yl2	fxcsmadd  ys2, alpha1, a2, ys2	fxcsmadd  ys3, alpha1, a6, ys3	fxcpmadd  ys2, alpha2, a3, ys2	fxcpmadd  ys3, alpha2, a7, ys3	fxcsmadd  ys2, alpha2, a4, ys2	fxcsmadd  ys3, alpha2, a8, ys3	fmr	yl1, yl3	fmr	ys1, ys2	fmr	ys2, ys3	STFXDUX	ys1, YS, INCY2	fsmr	ys1, ys3	STFXDUX	ys2, YS, INCY2	.align 4.L46:	andi.	r0, M, 2	ble	.L47	LFXDUX	yl2, YL, INCY2	LFPDUX	a1,  A1, INC2	LFPDUX	a2,  A2, INC2	LFPDUX	a3,  A3, INC2	LFPDUX	a4,  A4, INC2	fsmr	yl1, yl2	fxcpmadd  ys2, alpha1, a1, yl1	fxcsmadd  ys2, alpha1, a2, ys2	fxcpmadd  ys2, alpha2, a3, ys2	fxcsmadd  ys2, alpha2, a4, ys2	fmr	yl1, yl2	fmr	ys1, ys2	STFXDUX	ys1, YS, INCY2	fsmr	ys1, ys2	.align 4.L47:	andi.	r0, M, 1	ble	.L48	LFDUX	a1,  A1, INC2	LFDUX	a2,  A2, INC2	LFDUX	a3,  A3, INC2	LFDUX	a4,  A4, INC2	fxcpmadd  ys2, alpha1, a1, yl1	fxcsmadd  ys2, alpha1, a2, ys2	fxcpmadd  ys2, alpha2, a3, ys2	fxcsmadd  ys2, alpha2, a4, ys2	STFSDX	ys1, YS, INCY2	add	YS, YS, INCY	STFDX	ys2, YS, INCY2	b	.L49	.align 4.L48:	STFSDUX	ys1, YS, INCY2	.align 4.L49:	addi	J, J, -1	cmpi	cr0, 0, J, 0	bgt	.L41	.align 4	.L50:	andi.	J, N, 2	ble	.L60	LFDUX	alpha1, X, INCX	mr	A1, A	add	A2, A,  LDA	add	A,  A2, LDA	LFSDUX	alpha1, X, INCX	mr	YL, Y	sub	YS, Y, INCY2	fpmul	alpha1, alpha, alpha1	LFSDX	ys1, YS, INCY2	LFDX	yl1, YL, INCY	srawi.	r0,  M, 3	mtspr	CTR, r0	ble	.L55	LFPDUX	a1,  A1, INC2	LFPDUX	a5,  A1, INC2	LFPDUX	a9,  A1, INC2	LFPDUX	a13, A1, INC2	LFXDUX	yl2, YL, INCY2	LFXDUX	yl3, YL, INCY2 	LFXDUX	yl4, YL, INCY2	LFXDUX	yl5, YL, INCY2	LFPDUX	a2,  A2, INC2	fsmr	yl1, yl2	LFPDUX	a6,  A2, INC2	fsmr	yl2, yl3	LFPDUX	a10, A2, INC2	fsmr	yl3, yl4	LFPDUX	a14, A2, INC2	fsmr	yl4, yl5	bdz	.L53	.align 4.L52:	fxcpmadd  ys2, alpha1, a1,  yl1	LFPDUX	a1,  A1, INC2	fxcpmadd  ys3, alpha1, a5,  yl2	LFPDUX	a5,  A1, INC2	fxcpmadd  ys4, alpha1, a9,  yl3	LFPDUX	a9,  A1, INC2	fxcpmadd  ys5, alpha1, a13, yl4	LFPDUX	a13, A1, INC2	fxcsmadd  ys2, alpha1, a2,  ys2	LFPDUX	a2,  A2, INC2	fxcsmadd  ys3, alpha1, a6,  ys3	LFPDUX	a6,  A2, INC2	fxcsmadd  ys4, alpha1, a10, ys4	LFPDUX	a10, A2, INC2	fxcsmadd  ys5, alpha1, a14, ys5	LFPDUX	a14, A2, INC2	fmr	yl1, yl5	LFXDUX	yl2, YL, INCY2	fmr	ys1, ys2	LFXDUX	yl3, YL, INCY2	fmr	ys2, ys3 	LFXDUX	yl4, YL, INCY2	fmr	ys3, ys4	LFXDUX	yl5, YL, INCY2	fmr	ys4, ys5	STFXDUX	ys1, YS, INCY2	fsmr	ys1, ys5	STFXDUX	ys2, YS, INCY2	fsmr	yl1, yl2	STFXDUX	ys3, YS, INCY2	fsmr	yl2, yl3	STFXDUX	ys4, YS, INCY2	fsmr	yl3, yl4	fsmr	yl4, yl5	bdnz	.L52	.align 4.L53:	fxcpmadd  ys2, alpha1, a1,  yl1	fxcpmadd  ys3, alpha1, a5,  yl2	fxcpmadd  ys4, alpha1, a9,  yl3	fxcpmadd  ys5, alpha1, a13, yl4	fxcsmadd  ys2, alpha1, a2,  ys2	fxcsmadd  ys3, alpha1, a6,  ys3	fxcsmadd  ys4, alpha1, a10, ys4	fxcsmadd  ys5, alpha1, a14, ys5	fmr	yl1, yl5	fmr	ys1, ys2	fmr	ys2, ys3	fmr	ys3, ys4	fmr	ys4, ys5	STFXDUX	ys1, YS, INCY2	fsmr	ys1, ys5	STFXDUX	ys2, YS, INCY2	STFXDUX	ys3, YS, INCY2	STFXDUX	ys4, YS, INCY2	.align 4.L55:	andi.	r0, M, 7	ble	.L59	andi.	r0, M, 4	ble	.L57	LFXDUX	yl2, YL, INCY2	LFXDUX	yl3, YL, INCY2	LFPDUX	a1,  A1, INC2	LFPDUX	a2,  A2, INC2	LFPDUX	a5,  A1, INC2	LFPDUX	a6,  A2, INC2	fsmr	yl1, yl2	fsmr	yl2, yl3	fxcpmadd  ys2, alpha1, a1, yl1	fxcsmadd  ys2, alpha1, a2, ys2	fxcpmadd  ys3, alpha1, a5, yl2	fxcsmadd  ys3, alpha1, a6, ys3	fmr	yl1, yl3	fmr	ys1, ys2	fmr	ys2, ys3	STFXDUX	ys1, YS, INCY2	STFXDUX	ys2, YS, INCY2	fsmr	  ys1, ys3	.align 4.L57:	andi.	r0, M, 2	ble	.L58	LFXDUX	yl2, YL, INCY2	LFPDUX	a1,  A1, INC2	LFPDUX	a2,  A2, INC2	fsmr	yl1, yl2	fxcpmadd  ys2, alpha1, a1, yl1	fxcsmadd  ys2, alpha1, a2, ys2	fmr	yl1, yl2	fmr	ys1, ys2	STFXDUX	ys1, YS, INCY2	fsmr	ys1, ys2	.align 4.L58:	andi.	r0, M, 1	ble	.L59	LFDUX	a1,  A1, INC2	LFDUX	a2,  A2, INC2	fxmr	alpha2, alpha1	fmadd	ys1, alpha1, a1, yl1	fmadd	ys1, alpha2, a2, ys1	STFXDUX	ys1, YS, INCY2	b	.L60	.align 4.L59:	STFSDUX	ys1, YS, INCY2	.align 4.L60:	andi.	J, N, 1	ble	.L999	LFDUX	alpha1, X, INCX	mr	A1, A	mr	YL, Y	sub	YS, Y, INCY2	fmul	alpha1, alpha, alpha1	LFSDX	ys1, YS, INCY2	LFDX	yl1, YL, INCY	srawi.	r0,  M, 3	mtspr	CTR, r0	ble	.L65	LFXDUX	yl2, YL, INCY2	LFXDUX	yl3, YL, INCY2	LFXDUX	yl4, YL, INCY2	LFXDUX	yl5, YL, INCY2	LFPDUX	a1,  A1, INC2	LFPDUX	a5,  A1, INC2	LFPDUX	a9,  A1, INC2	LFPDUX	a13, A1, INC2	fsmr	yl1, yl2	fsmr	yl2, yl3	fsmr	yl3, yl4	fsmr	yl4, yl5	bdz	.L63	.align 4.L62:	fxcpmadd  ys2, alpha1, a1,  yl1	LFPDUX	a1,  A1, INC2	fxcpmadd  ys3, alpha1, a5,  yl2	LFXDUX	yl2, YL, INCY2	fxcpmadd  ys4, alpha1, a9,  yl3	LFXDUX	yl3, YL, INCY2	fxcpmadd  ys5, alpha1, a13, yl4	LFXDUX	yl4, YL, INCY2	fmr	yl1, yl5	LFXDUX	yl5, YL, INCY2	fmr	ys1, ys2	LFPDUX	a5,  A1, INC2	fmr	ys2, ys3	LFPDUX	a9,  A1, INC2	fmr	ys3, ys4	LFPDUX	a13, A1, INC2	fmr	ys4, ys5	STFXDUX	ys1, YS, INCY2	fsmr	ys1, ys5	STFXDUX	ys2, YS, INCY2	fsmr	yl1, yl2	STFXDUX	ys3, YS, INCY2	fsmr	yl2, yl3	STFXDUX	ys4, YS, INCY2	fsmr	yl3, yl4	fsmr	yl4, yl5	bdnz	.L62	.align 4.L63:	fxcpmadd  ys2, alpha1, a1,  yl1	fxcpmadd  ys3, alpha1, a5,  yl2	fxcpmadd  ys4, alpha1, a9,  yl3	fxcpmadd  ys5, alpha1, a13, yl4	fmr	yl1, yl5	fmr	ys1, ys2	fmr	ys2, ys3	fmr	ys3, ys4	fmr	ys4, ys5	STFXDUX	ys1, YS, INCY2	fsmr	ys1, ys5	STFXDUX	ys2, YS, INCY2	STFXDUX	ys3, YS, INCY2	STFXDUX	ys4, YS, INCY2	.align 4.L65:	andi.	r0, M, 7	ble	.L69	andi.	r0, M, 4	ble	.L67	LFXDUX	yl2, YL, INCY2	LFXDUX	yl3, YL, INCY2	LFPDUX	a1,  A1, INC2	LFPDUX	a5,  A1, INC2	fsmr	yl1, yl2	fsmr	yl2, yl3	fxcpmadd  ys2, alpha1, a1, yl1	fxcpmadd  ys3, alpha1, a5, yl2	fmr	yl1, yl3	fmr	ys1, ys2	fmr	ys2, ys3	STFXDUX	ys1, YS, INCY2	fsmr	  ys1, ys3	STFXDUX	ys2, YS, INCY2	.align 4.L67:	andi.	r0, M, 2	ble	.L68	LFPDUX	a1,  A1, INC2	LFXDUX	yl2, YL, INCY2	fsmr	yl1, yl2	fxcpmadd  ys2, alpha1, a1, yl1	fmr	yl1, yl2	fmr	ys1, ys2	STFXDUX	ys1, YS, INCY2	fsmr	ys1, ys2	.align 4.L68:	andi.	r0, M, 1	ble	.L69	LFDUX	a1,  A1, INC2	fmadd  ys1, alpha1, a1, yl1	STFXDUX	ys1, YS, INCY2	b	.L999	.align 4.L69:

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -