⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 zgemv_n.s

📁 Optimized GotoBLAS libraries
💻 S
📖 第 1 页 / 共 2 页
字号:
	LDF	[A1 +  8 * SIZE], a1	FSUBX	y7, t3, y7	FMUL	a3, x1, t3	LDF	[Y1 + 10 * SIZE], y3	FADDX	y8, t4, y8	add	Y1, 8 * SIZE, Y1	FMUL	a3, x2, t4	LDF	[A1 + 10 * SIZE], a3	STF	y5, [Y1 -  4 * SIZE]	STF	y6, [Y1 -  3 * SIZE]	STF	y7, [Y1 -  2 * SIZE]	STF	y8, [Y1 -  1 * SIZE].LL26:	FADD	y1, t1, y1	LDF	[Y1 +  3 * SIZE], y4	FMUL	a2, x2, t1	FADD	y2, t2, y2	FMUL	a2, x1, t2	FADD	y3, t3, y3	LDF	[Y1 +  4 * SIZE], y5	FMUL	a4, x2, t3	FADD	y4, t4, y4	FMUL	a4, x1, t4	FSUBX	y1, t1, y1	LDF	[Y1 +  5 * SIZE], y6	FMUL	a5, x3, t1	FADDX	y2, t2, y2	FMUL	a5, x4, t2	FSUBX	y3, t3, y3	LDF	[Y1 +  6 * SIZE], y7	FADDX	y4, t4, y4	FMUL	a7, x4, t4	FADD	y1, t1, y1	LDF	[Y1 +  7 * SIZE], y8	FMUL	a7, x3, t3	FMUL	a6, x4, t1	FADD	y2, t2, y2	FMUL	a6, x3, t2	FADD	y3, t3, y3	FMUL	a8, x4, t3	FADD	y4, t4, y4	FMUL	a8, x3, t4	FSUBX	y1, t1, y1	FMUL	a9,  x1, t1	FADDX	y2, t2, y2	FMUL	a9,  x2, t2	FSUBX	y3, t3, y3	FMUL	a11, x1, t3	FADDX	y4, t4, y4	FMUL	a11, x2, t4	FADD	y5, t1, y5	FMUL	a10, x2, t1	FADD	y6, t2, y6	FMUL	a10, x1, t2	FADD	y7, t3, y7	FMUL	a12, x2, t3	FADD	y8, t4, y8	FMUL	a12, x1, t4	FSUBX	y5, t1, y5	FMUL	a13, x3, t1	FADDX	y6, t2, y6	FMUL	a13, x4, t2	FSUBX	y7, t3, y7	FMUL	a15, x3, t3	FADDX	y8, t4, y8	FMUL	a15, x4, t4	FADD	y5, t1, y5	FMUL	a14, x4, t1	FADD	y6, t2, y6	FMUL	a14, x3, t2	FADD	y7, t3, y7	FMUL	a16, x4, t3	FADD	y8, t4, y8	FMUL	a16, x3, t4	STF	y1, [Y1 + 0 * SIZE]	FSUBX	y5, t1, y5	STF	y2, [Y1 + 1 * SIZE]	FADDX	y6, t2, y6	STF	y3, [Y1 + 2 * SIZE]	FSUBX	y7, t3, y7	STF	y4, [Y1 + 3 * SIZE]	FADDX	y8, t4, y8	STF	y5, [Y1 + 4 * SIZE]	add	A1, 8 * SIZE, A1	STF	y6, [Y1 + 5 * SIZE]	add	A2, 8 * SIZE, A2	STF	y7, [Y1 + 6 * SIZE]	STF	y8, [Y1 + 7 * SIZE]	add	Y1, 8 * SIZE, Y1.LL27:	andcc	M, 2, I	ble,pn	%icc, .LL28	nop	LDF	[A1 + 0 * SIZE], a1	LDF	[A1 + 1 * SIZE], a2	LDF	[A1 + 2 * SIZE], a3	LDF	[A1 + 3 * SIZE], a4	LDF	[Y1 + 0 * SIZE], y1	LDF	[Y1 + 1 * SIZE], y2	LDF	[Y1 + 2 * SIZE], y3	LDF	[Y1 + 3 * SIZE], y4	FMUL	a1, x1, t1	LDF	[A2 + 0 * SIZE], a5	FMUL	a1, x2, t2	LDF	[A2 + 1 * SIZE], a6	FMUL	a3, x1, t3	LDF	[A2 + 2 * SIZE], a7	FMUL	a3, x2, t4	LDF	[A2 + 3 * SIZE], a8	FADD	y1, t1, y1	FMUL	a2, x2, t1	FADD	y2, t2, y2	FMUL	a2, x1, t2	FADD	y3, t3, y3	FMUL	a4, x2, t3	FADD	y4, t4, y4	FMUL	a4, x1, t4	FSUBX	y1, t1, y1	FMUL	a5, x3, t1	FADDX	y2, t2, y2	FMUL	a5, x4, t2	FSUBX	y3, t3, y3	FMUL	a7, x3, t3	FADDX	y4, t4, y4	FMUL	a7, x4, t4	FADD	y1, t1, y1	FMUL	a6, x4, t1	FADD	y2, t2, y2	FMUL	a6, x3, t2	FADD	y3, t3, y3	FMUL	a8, x4, t3	FADD	y4, t4, y4	FMUL	a8, x3, t4	FSUBX	y1, t1, y1	FADDX	y2, t2, y2	FSUBX	y3, t3, y3	FADDX	y4, t4, y4	STF	y1, [Y1 + 0 * SIZE]	add	A1, 4 * SIZE, A1	STF	y2, [Y1 + 1 * SIZE]	add	A2, 4 * SIZE, A2	STF	y3, [Y1 + 2 * SIZE]	nop	STF	y4, [Y1 + 3 * SIZE]	add	Y1, 4 * SIZE, Y1.LL28:	andcc	M, 1, I	ble,pn	%icc, .LL29	nop	LDF	[A1 + 0 * SIZE], a1	LDF	[A1 + 1 * SIZE], a2	LDF	[A2 + 0 * SIZE], a3	LDF	[A2 + 1 * SIZE], a4	LDF	[Y1 + 0 * SIZE], y1	LDF	[Y1 + 1 * SIZE], y2	FMUL	a1, x1, t1	FMUL	a1, x2, t2	FMUL	a2, x2, t3	FMUL	a2, x1, t4	FADD	y1, t1, y1	FMUL	a3, x3, t1	FADD	y2, t2, y2	FMUL	a3, x4, t2	FSUBX	y1, t3, y1	FMUL	a4, x4, t3	FADDX	y2, t4, y2	FMUL	a4, x3, t4	FADD	y1, t1, y1	FADD	y2, t2, y2	FSUBX	y1, t3, y1	FADDX	y2, t4, y2	STF	y1, [Y1 + 0 * SIZE]	STF	y2, [Y1 + 1 * SIZE].LL29:	deccc	J	bg	%icc, .LL21	nop.LL30:	andcc	N, 1, J	ble,pn	%icc, .LL990	nop.LL31:	mov	YY, Y1	mov	A,  A1	LDF	STACK_ALPHA_R, ALPHA_R	LDF	STACK_ALPHA_I, ALPHA_I	LDF	[X + 0 * SIZE], x1	LDF	[X + 1 * SIZE], x2	FMUL	ALPHA_R, x1, a1		/* AC */	FMUL	ALPHA_I, x1, a2		/* AD */	FMUL	ALPHA_R, x2, a3		/* BC */	FMUL	ALPHA_I, x2, a4		/* BD */#ifndef XCONJ	FSUB	a1, a4, x1	FADD	a2, a3, x2#else	FADD	a1, a4, x1	FSUB	a2, a3, x2#endif	sra	M, 2, I	cmp	I, 0	ble,pn	%icc, .LL37	nop	LDF	[A1 + 0 * SIZE], a1	LDF	[A1 + 1 * SIZE], a2	LDF	[A1 + 2 * SIZE], a3	LDF	[A1 + 3 * SIZE], a4	LDF	[A1 + 4 * SIZE], a9	LDF	[A1 + 5 * SIZE], a10	LDF	[A1 + 6 * SIZE], a11	LDF	[A1 + 7 * SIZE], a12	LDF	[Y1 + 0 * SIZE], y1	LDF	[Y1 + 1 * SIZE], y2	LDF	[Y1 + 2 * SIZE], y3	LDF	[Y1 + 3 * SIZE], y4	LDF	[Y1 + 4 * SIZE], y5	LDF	[Y1 + 5 * SIZE], y6	LDF	[Y1 + 6 * SIZE], y7	LDF	[Y1 + 7 * SIZE], y8	FMUL	a1, x1, t1	deccc	I	FMUL	a1, x2, t2	LDF	[A1 +  8 * SIZE], a1	FMUL	a3, x1, t3	FMUL	a3, x2, t4	ble,pn	%icc, .LL33	LDF	[A1 + 10 * SIZE], a3.LL32:	FADD	y1, t1, y1	prefetch  [A1 +  PREFETCHSIZE * SIZE], 1	FMUL	a2, x2, t1	FADD	y2, t2, y2	FMUL	a2, x1, t2	LDF	[A1 +  9 * SIZE], a2	FADD	y3, t3, y3	FMUL	a4, x2, t3	FADD	y4, t4, y4	FMUL	a4, x1, t4	LDF	[A1 + 11 * SIZE], a4	FSUBX	y1, t1, y1	FMUL	a9,  x1, t1	FADDX	y2, t2, y2	FMUL	a9,  x2, t2	LDF	[A1 + 12 * SIZE], a9	FSUBX	y3, t3, y3	FMUL	a11, x1, t3	FADDX	y4, t4, y4	FMUL	a11, x2, t4	LDF	[A1 + 14 * SIZE], a11	STF	y1, [Y1 + 0 * SIZE]	STF	y2, [Y1 + 1 * SIZE]	STF	y3, [Y1 + 2 * SIZE]	STF	y4, [Y1 + 3 * SIZE]	FADD	y5, t1, y5	FMUL	a10, x2, t1	LDF	[Y1 +  8 * SIZE], y1	FADD	y6, t2, y6	FMUL	a10, x1, t2	LDF	[A1 + 13 * SIZE], a10	FADD	y7, t3, y7	deccc	I	FMUL	a12, x2, t3	LDF	[Y1 +  9 * SIZE], y2	FADD	y8, t4, y8	FMUL	a12, x1, t4	LDF	[A1 + 15 * SIZE], a12	FSUBX	y5, t1, y5	add	A1, 8 * SIZE, A1	FMUL	a1, x1, t1	LDF	[Y1 + 10 * SIZE], y3	FADDX	y6, t2, y6	FMUL	a1, x2, t2	LDF	[A1 +  8 * SIZE], a1	FSUBX	y7, t3, y7	FMUL	a3, x1, t3	LDF	[Y1 + 11 * SIZE], y4	FADDX	y8, t4, y8	FMUL	a3, x2, t4	LDF	[A1 + 10 * SIZE], a3	STF	y5, [Y1 + 4 * SIZE]	STF	y6, [Y1 + 5 * SIZE]	STF	y7, [Y1 + 6 * SIZE]	STF	y8, [Y1 + 7 * SIZE]	LDF	[Y1 + 12 * SIZE], y5	LDF	[Y1 + 13 * SIZE], y6	LDF	[Y1 + 14 * SIZE], y7	add	Y1, 8 * SIZE, Y1	bg,pn	%icc, .LL32	LDF	[Y1 +  7 * SIZE], y8.LL33:	FADD	y1, t1, y1	FMUL	a2, x2, t1	FADD	y2, t2, y2	FMUL	a2, x1, t2	FADD	y3, t3, y3	FMUL	a4, x2, t3	FADD	y4, t4, y4	FMUL	a4, x1, t4	FSUBX	y1, t1, y1	FMUL	a9,  x1, t1	FADDX	y2, t2, y2	FMUL	a9,  x2, t2	FSUBX	y3, t3, y3	FMUL	a11, x1, t3	FADDX	y4, t4, y4	FMUL	a11, x2, t4	FADD	y5, t1, y5	FMUL	a10, x2, t1	FADD	y6, t2, y6	FMUL	a10, x1, t2	FADD	y7, t3, y7	FMUL	a12, x2, t3	FADD	y8, t4, y8	FMUL	a12, x1, t4	FSUBX	y5, t1, y5	FADDX	y6, t2, y6	FSUBX	y7, t3, y7	FADDX	y8, t4, y8	STF	y1, [Y1 + 0 * SIZE]	STF	y2, [Y1 + 1 * SIZE]	STF	y3, [Y1 + 2 * SIZE]	STF	y4, [Y1 + 3 * SIZE]	STF	y5, [Y1 + 4 * SIZE]	STF	y6, [Y1 + 5 * SIZE]	STF	y7, [Y1 + 6 * SIZE]	STF	y8, [Y1 + 7 * SIZE]	add	A1, 8 * SIZE, A1	add	Y1, 8 * SIZE, Y1.LL37:	andcc	M, 2, I	ble,pn	%icc, .LL38	nop	LDF	[A1 + 0 * SIZE], a1	LDF	[A1 + 1 * SIZE], a2	LDF	[A1 + 2 * SIZE], a3	LDF	[A1 + 3 * SIZE], a4	LDF	[Y1 + 0 * SIZE], y1	FMUL	a1, x1, t1	LDF	[Y1 + 1 * SIZE], y2	FMUL	a1, x2, t2	LDF	[Y1 + 2 * SIZE], y3	FMUL	a3, x1, t3	LDF	[Y1 + 3 * SIZE], y4	FMUL	a3, x2, t4	FADD	y1, t1, y1	FMUL	a2, x2, t1	FADD	y2, t2, y2	FMUL	a2, x1, t2	FADD	y3, t3, y3	FMUL	a4, x2, t3	FADD	y4, t4, y4	FMUL	a4, x1, t4	FSUBX	y1, t1, y1	FADDX	y2, t2, y2	FSUBX	y3, t3, y3	FADDX	y4, t4, y4	STF	y1, [Y1 + 0 * SIZE]	STF	y2, [Y1 + 1 * SIZE]	STF	y3, [Y1 + 2 * SIZE]	STF	y4, [Y1 + 3 * SIZE]	add	A1, 4 * SIZE, A1	add	Y1, 4 * SIZE, Y1.LL38:	andcc	M, 1, I	ble,pn	%icc, .LL990	nop	LDF	[A1 + 0 * SIZE], a1	LDF	[A1 + 1 * SIZE], a2	LDF	[Y1 + 0 * SIZE], y1	LDF	[Y1 + 1 * SIZE], y2	FMUL	a1, x1, t1	FMUL	a1, x2, t2	FMUL	a2, x2, t3	FMUL	a2, x1, t4	FADD	y1, t1, y1	FADD	y2, t2, y2	FSUBX	y1, t3, y1	FADDX	y2, t4, y2	STF	y1, [Y1 + 0 * SIZE]	STF	y2, [Y1 + 1 * SIZE].LL990:	cmp	INCY, 2 * SIZE	be	%icc, .LL999	mov	Y, Y1	sra	M, 2, I	cmp	I, 0	ble,pn	%icc, .LL995	nop.LL991:	LDF	[BUFFER +  0 * SIZE], a1	LDF	[BUFFER +  1 * SIZE], a2	LDF	[Y + 0 * SIZE], y1	LDF	[Y + 1 * SIZE], y2	add	Y, INCY, Y	LDF	[BUFFER +  2 * SIZE], a3	LDF	[BUFFER +  3 * SIZE], a4	LDF	[Y + 0 * SIZE], y3	LDF	[Y + 1 * SIZE], y4	add	Y, INCY, Y	LDF	[BUFFER +  4 * SIZE], a5	LDF	[BUFFER +  5 * SIZE], a6	LDF	[Y + 0 * SIZE], y5	LDF	[Y + 1 * SIZE], y6	add	Y, INCY, Y	LDF	[BUFFER +  6 * SIZE], a7	LDF	[BUFFER +  7 * SIZE], a8	LDF	[Y + 0 * SIZE], y7	LDF	[Y + 1 * SIZE], y8	add	Y, INCY, Y	FADD	y1, a1, y1	FADD	y2, a2, y2	FADD	y3, a3, y3	FADD	y4, a4, y4	FADD	y5, a5, y5	FADD	y6, a6, y6	FADD	y7, a7, y7	FADD	y8, a8, y8	STF	y1, [Y1 + 0 * SIZE]	STF	y2, [Y1 + 1 * SIZE]	add	Y1, INCY, Y1	STF	y3, [Y1 + 0 * SIZE]	STF	y4, [Y1 + 1 * SIZE]	add	Y1, INCY, Y1	STF	y5, [Y1 + 0 * SIZE]	STF	y6, [Y1 + 1 * SIZE]	add	Y1, INCY, Y1	STF	y7, [Y1 + 0 * SIZE]	STF	y8, [Y1 + 1 * SIZE]	add	Y1, INCY, Y1	deccc	I	bg,pn	%icc, .LL991	add	BUFFER, 8 * SIZE, BUFFER.LL995:	andcc	M, 2, I	ble,pn	%icc, .LL996	nop	LDF	[BUFFER +  0 * SIZE], a1	LDF	[BUFFER +  1 * SIZE], a2	LDF	[Y + 0 * SIZE], y1	LDF	[Y + 1 * SIZE], y2	add	Y, INCY, Y	LDF	[BUFFER +  2 * SIZE], a3	LDF	[BUFFER +  3 * SIZE], a4	LDF	[Y + 0 * SIZE], y3	LDF	[Y + 1 * SIZE], y4	add	Y, INCY, Y	FADD	y1, a1, y1	FADD	y2, a2, y2	FADD	y3, a3, y3	FADD	y4, a4, y4	STF	y1, [Y1 + 0 * SIZE]	STF	y2, [Y1 + 1 * SIZE]	add	Y1, INCY, Y1	STF	y3, [Y1 + 0 * SIZE]	STF	y4, [Y1 + 1 * SIZE]	add	Y1, INCY, Y1	add	BUFFER, 4 * SIZE, BUFFER	.LL996:	andcc	M, 1, I	ble,pn	%icc, .LL999	nop	LDF	[BUFFER +  0 * SIZE], a1	LDF	[BUFFER +  1 * SIZE], a2	LDF	[Y + 0 * SIZE], y1	LDF	[Y + 1 * SIZE], y2	FADD	y1, a1, y1	FADD	y2, a2, y2	STF	y1, [Y1 + 0 * SIZE]	STF	y2, [Y1 + 1 * SIZE].LL999:	return	%i7 + 8	clr	%o0	EPILOGUE

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -