⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 gemv_n.s

📁 Optimized GotoBLAS libraries
💻 S
📖 第 1 页 / 共 2 页
字号:
	LDF	[A2 + 1 * SIZE], a6	LDF	[A3 + 1 * SIZE], a7	LDF	[A4 + 1 * SIZE], a8	LDF	[Y1 + 1 * SIZE], y2	add	A1, 2 * SIZE, A1	add	A2, 2 * SIZE, A2	add	A3, 2 * SIZE, A3	add	A4, 2 * SIZE, A4	FMUL	a1, x1, t1	FMUL	a2, x2, t2	FMUL	a3, x3, t3	FMUL	a4, x4, t4	FADD	y1, t1, y1	FMUL	a5, x1, t1	FADD	y1, t2, y1	FMUL	a6, x2, t2	FADD	y1, t3, y1	FMUL	a7, x3, t3	FADD	y1, t4, y1	FMUL	a8, x4, t4	FADD	y2, t1, y2	FADD	y2, t2, y2	FADD	y2, t3, y2	FADD	y2, t4, y2	STF	y1, [Y1 + 0 * SIZE]	STF	y2, [Y1 + 1 * SIZE]	add	Y1, 2 * SIZE, Y1.LL18:	andcc	M, 1, I	ble,pn	%icc, .LL19	nop	LDF	[A1 + 0 * SIZE], a1	LDF	[A2 + 0 * SIZE], a2	LDF	[A3 + 0 * SIZE], a3	LDF	[A4 + 0 * SIZE], a4	LDF	[Y1 + 0 * SIZE], y1	FMUL	a1, x1, t1	FMUL	a2, x2, t2	FMUL	a3, x3, t3	FMUL	a4, x4, t4	FADD	y1, t1, y1	FADD	y1, t2, y1	FADD	y1, t3, y1	FADD	y1, t4, y1	STF	y1, [Y1].LL19:	deccc	J	bg	%icc, .LL11	nop.LL20:	andcc	N, 2, J	ble,pn	%icc, .LL30	nop.LL21:	mov	YY, Y1	mov	A,  A1	add	A,  LDA, A2	add	A2, LDA, A	LDF	STACK_ALPHA, ALPHA	LDF	[X], x1	add	X, INCX, X	LDF	[X], x2	add	X, INCX, X	FMUL	ALPHA, x1, x1	FMUL	ALPHA, x2, x2	sra	M, 3, I	cmp	I, 0	ble,pn	%icc, .LL26	nop	LDF	[Y1 + 0 * SIZE], y1	LDF	[Y1 + 1 * SIZE], y2	LDF	[Y1 + 2 * SIZE], y3	LDF	[Y1 + 3 * SIZE], y4	LDF	[Y1 + 4 * SIZE], y5	LDF	[Y1 + 5 * SIZE], y6	LDF	[Y1 + 6 * SIZE], y7	LDF	[Y1 + 7 * SIZE], y8	LDF	[A1 + 0 * SIZE], a1	LDF	[A1 + 1 * SIZE], a2	LDF	[A1 + 2 * SIZE], a3	LDF	[A1 + 3 * SIZE], a4	LDF	[A1 + 4 * SIZE], a5	LDF	[A1 + 5 * SIZE], a6	LDF	[A1 + 6 * SIZE], a7	LDF	[A1 + 7 * SIZE], a8	LDF	[A2 + 0 * SIZE], a9	LDF	[A2 + 1 * SIZE], a10	LDF	[A2 + 2 * SIZE], a11	LDF	[A2 + 3 * SIZE], a12	LDF	[A2 + 4 * SIZE], a13	LDF	[A2 + 5 * SIZE], a14	LDF	[A2 + 6 * SIZE], a15	LDF	[A2 + 7 * SIZE], a16	FMUL	a1,  x1, t1	deccc	I	LDF	[A1 +  8 * SIZE], a1	FMUL	a2,  x1, t2	LDF	[A1 +  9 * SIZE], a2	FMUL	a3,  x1, t3	LDF	[A1 + 10 * SIZE], a3	FMUL	a4,  x1, t4	ble,pn	%icc, .LL23	LDF	[A1 + 11 * SIZE], a4.LL22:	FADD	y1,  t1, y1	prefetch  [A1 +  PREFETCHSIZE * SIZE], 1	FMUL	a5,  x1, t1	LDF	[A1 + 12 * SIZE], a5	FADD	y2,  t2, y2	FMUL	a6,  x1, t2	LDF	[A1 + 13 * SIZE], a6	FADD	y3,  t3, y3	FMUL	a7,  x1, t3	LDF	[A1 + 14 * SIZE], a7	FADD	y4,  t4, y4	FMUL	a8,  x1, t4	LDF	[A1 + 15 * SIZE], a8	FADD	y5,  t1, y5	FMUL	a9,  x2, t1	LDF	[A2 +  8 * SIZE], a9	FADD	y6,  t2, y6	FMUL	a10, x2, t2	LDF	[A2 +  9 * SIZE], a10	FADD	y7,  t3, y7	FMUL	a11, x2, t3	LDF	[A2 + 10 * SIZE], a11	FADD	y8,  t4, y8	FMUL	a12, x2, t4	LDF	[A2 + 11 * SIZE], a12	FADD	y1,  t1, y1	prefetch  [A2 +  PREFETCHSIZE * SIZE], 1	FMUL	a13, x2, t1	LDF	[A2 + 12 * SIZE], a13	FADD	y2,  t2, y2	FMUL	a14, x2, t2	LDF	[A2 + 13 * SIZE], a14	FADD	y3,  t3, y3	FMUL	a15, x2, t3	LDF	[A2 + 14 * SIZE], a15	FADD	y4,  t4, y4	FMUL	a16, x2, t4	LDF	[A2 + 15 * SIZE], a16	FADD	y5,  t1, y5	FMUL	a1,  x1, t1	LDF	[A1 + 16 * SIZE], a1	FADD	y6,  t2, y6	FMUL	a2,  x1, t2	LDF	[A1 + 17 * SIZE], a2	FADD	y7,  t3, y7	FMUL	a3,  x1, t3	LDF	[A1 + 18 * SIZE], a3	FADD	y8,  t4, y8	FMUL	a4,  x1, t4	LDF	[A1 + 19 * SIZE], a4	STF	y1, [Y1 + 0 * SIZE]	STF	y2, [Y1 + 1 * SIZE]	STF	y3, [Y1 + 2 * SIZE]	STF	y4, [Y1 + 3 * SIZE]	STF	y5, [Y1 + 4 * SIZE]	STF	y6, [Y1 + 5 * SIZE]	STF	y7, [Y1 + 6 * SIZE]	STF	y8, [Y1 + 7 * SIZE]	LDF	[Y1 +  8 * SIZE], y1	add	A1, 8 * SIZE, A1	LDF	[Y1 +  9 * SIZE], y2	add	A2, 8 * SIZE, A2	LDF	[Y1 + 10 * SIZE], y3	deccc	I	LDF	[Y1 + 11 * SIZE], y4	LDF	[Y1 + 12 * SIZE], y5	LDF	[Y1 + 13 * SIZE], y6	LDF	[Y1 + 14 * SIZE], y7	LDF	[Y1 + 15 * SIZE], y8	bg,pn	%icc, .LL22	add	Y1, 8 * SIZE, Y1.LL23:	FADD	y1,  t1, y1	FMUL	a5,  x1, t1	FADD	y2,  t2, y2	FMUL	a6,  x1, t2	FADD	y3,  t3, y3	FMUL	a7,  x1, t3	FADD	y4,  t4, y4	FMUL	a8,  x1, t4	FADD	y5,  t1, y5	FMUL	a9,  x2, t1	FADD	y6,  t2, y6	FMUL	a10, x2, t2	FADD	y7,  t3, y7	FMUL	a11, x2, t3	FADD	y8,  t4, y8	FMUL	a12, x2, t4	FADD	y1,  t1, y1	FMUL	a13, x2, t1	FADD	y2,  t2, y2	FMUL	a14, x2, t2	FADD	y3,  t3, y3	FMUL	a15, x2, t3	FADD	y4,  t4, y4	FMUL	a16, x2, t4	STF	y1, [Y1 + 0 * SIZE]	FADD	y5,  t1, y5	STF	y2, [Y1 + 1 * SIZE]	FADD	y6,  t2, y6	STF	y3, [Y1 + 2 * SIZE]	FADD	y7,  t3, y7	STF	y4, [Y1 + 3 * SIZE]	FADD	y8,  t4, y8	STF	y5, [Y1 + 4 * SIZE]	add	A1, 8 * SIZE, A1	STF	y6, [Y1 + 5 * SIZE]	add	A2, 8 * SIZE, A2	STF	y7, [Y1 + 6 * SIZE]	nop	STF	y8, [Y1 + 7 * SIZE]	add	Y1, 8 * SIZE, Y1.LL26:	andcc	M, 4, I	ble,pn	%icc, .LL27	nop	LDF	[A1 + 0 * SIZE], a1	LDF	[A1 + 1 * SIZE], a2	LDF	[A1 + 2 * SIZE], a3	LDF	[A1 + 3 * SIZE], a4	LDF	[A2 + 0 * SIZE], a5	LDF	[A2 + 1 * SIZE], a6	LDF	[A2 + 2 * SIZE], a7	LDF	[A2 + 3 * SIZE], a8	LDF	[Y1 + 0 * SIZE], y1	add	A1, 4 * SIZE, A1	LDF	[Y1 + 1 * SIZE], y2	add	A2, 4 * SIZE, A2	LDF	[Y1 + 2 * SIZE], y3	LDF	[Y1 + 3 * SIZE], y4	FMUL	a1,  x1, t1	FMUL	a2,  x1, t2	FMUL	a3,  x1, t3	FMUL	a4,  x1, t4	FADD	y1,  t1, y1	FMUL	a5,  x2, t1	FADD	y2,  t2, y2	FMUL	a6,  x2, t2	FADD	y3,  t3, y3	FMUL	a7,  x2, t3	FADD	y4,  t4, y4	FMUL	a8,  x2, t4	FADD	y1,  t1, y1	FADD	y2,  t2, y2	FADD	y3,  t3, y3	FADD	y4,  t4, y4	STF	y1, [Y1 + 0 * SIZE]	STF	y2, [Y1 + 1 * SIZE]	STF	y3, [Y1 + 2 * SIZE]	STF	y4, [Y1 + 3 * SIZE]	add	Y1, 4 * SIZE, Y1.LL27:	andcc	M, 2, I	ble,pn	%icc, .LL28	nop	LDF	[A1 + 0 * SIZE], a1	LDF	[A2 + 0 * SIZE], a2	LDF	[Y1 + 0 * SIZE], y1	LDF	[A1 + 1 * SIZE], a5	LDF	[A2 + 1 * SIZE], a6	add	A1, 2 * SIZE, A1	LDF	[Y1 + 1 * SIZE], y2	add	A2, 2 * SIZE, A2	FMUL	a1, x1, t1	FMUL	a2, x2, t2	FADD	y1, t1, y1	FMUL	a5, x1, t1	FADD	y1, t2, y1	FMUL	a6, x2, t2	FADD	y2, t1, y2	FADD	y2, t2, y2	STF	y1, [Y1 + 0 * SIZE]	STF	y2, [Y1 + 1 * SIZE]	add	Y1, 2 * SIZE, Y1.LL28:	andcc	M, 1, I	ble,pn	%icc, .LL30	nop	LDF	[A1 + 0 * SIZE], a1	LDF	[A2 + 0 * SIZE], a2	LDF	[Y1 + 0 * SIZE], y1	FMUL	a1, x1, t1	FMUL	a2, x2, t2	FADD	y1, t1, y1	FADD	y1, t2, y1	STF	y1, [Y1].LL30:	andcc	N, 1, J	ble,pn	%icc, .LL990	nop.LL31:	mov	YY, Y1	mov	A,  A1	LDF	STACK_ALPHA, ALPHA	LDF	[X], x1	add	X, INCX, X	FMUL	ALPHA, x1, x1	sra	M, 3, I	cmp	I, 0	ble,pn	%icc, .LL36	nop	LDF	[Y1 + 0 * SIZE], y1	LDF	[Y1 + 1 * SIZE], y2	LDF	[Y1 + 2 * SIZE], y3	LDF	[Y1 + 3 * SIZE], y4	LDF	[Y1 + 4 * SIZE], y5	LDF	[Y1 + 5 * SIZE], y6	LDF	[Y1 + 6 * SIZE], y7	LDF	[Y1 + 7 * SIZE], y8	LDF	[A1 + 0 * SIZE], a1	LDF	[A1 + 1 * SIZE], a2	LDF	[A1 + 2 * SIZE], a3	LDF	[A1 + 3 * SIZE], a4	LDF	[A1 + 4 * SIZE], a5	LDF	[A1 + 5 * SIZE], a6	LDF	[A1 + 6 * SIZE], a7	LDF	[A1 + 7 * SIZE], a8	FMUL	a1,  x1, t1	deccc	I	LDF	[A1 +  8 * SIZE], a1	FMUL	a2,  x1, t2	LDF	[A1 +  9 * SIZE], a2	FMUL	a3,  x1, t3	LDF	[A1 + 10 * SIZE], a3	FMUL	a4,  x1, t4	ble,pn	%icc, .LL33	LDF	[A1 + 11 * SIZE], a4.LL32:	FADD	y1,  t1, y1	prefetch  [A1 +  PREFETCHSIZE * SIZE], 1	FMUL	a5,  x1, t1	LDF	[A1 + 12 * SIZE], a5	FADD	y2,  t2, y2	FMUL	a6,  x1, t2	LDF	[A1 + 13 * SIZE], a6	FADD	y3,  t3, y3	FMUL	a7,  x1, t3	LDF	[A1 + 14 * SIZE], a7	FADD	y4,  t4, y4	FMUL	a8,  x1, t4	LDF	[A1 + 15 * SIZE], a8	FADD	y5,  t1, y5	FMUL	a1,  x1, t1	LDF	[A1 + 16 * SIZE], a1	FADD	y6,  t2, y6	FMUL	a2,  x1, t2	LDF	[A1 + 17 * SIZE], a2	FADD	y7,  t3, y7	FMUL	a3,  x1, t3	LDF	[A1 + 18 * SIZE], a3	FADD	y8,  t4, y8	FMUL	a4,  x1, t4	LDF	[A1 + 19 * SIZE], a4	STF	y1, [Y1 + 0 * SIZE]	STF	y2, [Y1 + 1 * SIZE]	STF	y3, [Y1 + 2 * SIZE]	STF	y4, [Y1 + 3 * SIZE]	STF	y5, [Y1 + 4 * SIZE]	STF	y6, [Y1 + 5 * SIZE]	STF	y7, [Y1 + 6 * SIZE]	STF	y8, [Y1 + 7 * SIZE]	LDF	[Y1 +  8 * SIZE], y1	LDF	[Y1 +  9 * SIZE], y2	LDF	[Y1 + 10 * SIZE], y3	LDF	[Y1 + 11 * SIZE], y4	LDF	[Y1 + 12 * SIZE], y5	deccc	I	LDF	[Y1 + 13 * SIZE], y6	add	A1, 8 * SIZE, A1	LDF	[Y1 + 14 * SIZE], y7	add	Y1, 8 * SIZE, Y1	bg,pn	%icc, .LL32	LDF	[Y1 +  7 * SIZE], y8.LL33:	FADD	y1,  t1, y1	FMUL	a5,  x1, t1	FADD	y2,  t2, y2	FMUL	a6,  x1, t2	FADD	y3,  t3, y3	FMUL	a7,  x1, t3	FADD	y4,  t4, y4	FMUL	a8,  x1, t4	STF	y1, [Y1 + 0 * SIZE]	FADD	y5,  t1, y5	STF	y2, [Y1 + 1 * SIZE]	FADD	y6,  t2, y6	STF	y3, [Y1 + 2 * SIZE]	FADD	y7,  t3, y7	STF	y4, [Y1 + 3 * SIZE]	FADD	y8,  t4, y8	STF	y5, [Y1 + 4 * SIZE]	STF	y6, [Y1 + 5 * SIZE]	STF	y7, [Y1 + 6 * SIZE]	add	A1, 8 * SIZE, A1	STF	y8, [Y1 + 7 * SIZE]	add	Y1, 8 * SIZE, Y1.LL36:	andcc	M, 4, I	ble,pn	%icc, .LL37	nop	LDF	[A1 + 0 * SIZE], a1	LDF	[A1 + 1 * SIZE], a2	LDF	[A1 + 2 * SIZE], a3	LDF	[A1 + 3 * SIZE], a4	LDF	[Y1 + 0 * SIZE], y1	add	A1, 4 * SIZE, A1	LDF	[Y1 + 1 * SIZE], y2	LDF	[Y1 + 2 * SIZE], y3	LDF	[Y1 + 3 * SIZE], y4	FMUL	a1,  x1, t1	FMUL	a2,  x1, t2	FMUL	a3,  x1, t3	FMUL	a4,  x1, t4	FADD	y1,  t1, y1	FADD	y2,  t2, y2	FADD	y3,  t3, y3	FADD	y4,  t4, y4	STF	y1, [Y1 + 0 * SIZE]	STF	y2, [Y1 + 1 * SIZE]	STF	y3, [Y1 + 2 * SIZE]	STF	y4, [Y1 + 3 * SIZE]	add	Y1, 4 * SIZE, Y1.LL37:	andcc	M, 2, I	ble,pn	%icc, .LL38	nop	LDF	[A1 + 0 * SIZE], a1	LDF	[Y1 + 0 * SIZE], y1	LDF	[A1 + 1 * SIZE], a5	LDF	[Y1 + 1 * SIZE], y2	add	A1, 2 * SIZE, A1	FMUL	a1, x1, t1	FADD	y1, t1, y1	FMUL	a5, x1, t1	FADD	y2, t1, y2	STF	y1, [Y1 + 0 * SIZE]	STF	y2, [Y1 + 1 * SIZE]	add	Y1, 2 * SIZE, Y1.LL38:	andcc	M, 1, I	ble,pn	%icc, .LL990	nop	LDF	[A1 + 0 * SIZE], a1	LDF	[Y1 + 0 * SIZE], y1	FMUL	a1, x1, t1	FADD	y1, t1, y1	STF	y1, [Y1].LL990:	cmp	INCY, SIZE	be	%icc, .LL999	mov	Y, Y1	sra	M, 3, I	cmp	I, 0	ble,pn	%icc, .LL995	nop.LL991:	LDF	[BUFFER +  0 * SIZE], a1	LDF	[Y], y1	add	Y, INCY, Y	LDF	[BUFFER +  1 * SIZE], a2	LDF	[Y], y2	add	Y, INCY, Y	LDF	[BUFFER +  2 * SIZE], a3	LDF	[Y], y3	add	Y, INCY, Y	LDF	[BUFFER +  3 * SIZE], a4	LDF	[Y], y4	add	Y, INCY, Y	LDF	[BUFFER +  4 * SIZE], a5	FADD	y1, a1, y1	LDF	[Y], y5	add	Y, INCY, Y	LDF	[BUFFER +  5 * SIZE], a6	FADD	y2, a2, y2	LDF	[Y], y6	add	Y, INCY, Y	LDF	[BUFFER +  6 * SIZE], a7	FADD	y3, a3, y3	LDF	[Y], y7	add	Y, INCY, Y	LDF	[BUFFER +  7 * SIZE], a8	FADD	y4, a4, y4	LDF	[Y], y8	add	Y, INCY, Y	STF	y1, [Y1]	FADD	y5, a5, y5	add	Y1, INCY, Y1	STF	y2, [Y1]	FADD	y6, a6, y6	add	Y1, INCY, Y1	STF	y3, [Y1]	FADD	y7, a7, y7	add	Y1, INCY, Y1	STF	y4, [Y1]	FADD	y8, a8, y8	add	Y1, INCY, Y1	STF	y5, [Y1]	add	Y1, INCY, Y1	STF	y6, [Y1]	add	Y1, INCY, Y1	STF	y7, [Y1]	add	Y1, INCY, Y1	STF	y8, [Y1]	add	Y1, INCY, Y1		deccc	I	bg,pn	%icc, .LL991	add	BUFFER, 8 * SIZE, BUFFER.LL995:	andcc	M, 7, I	ble,pn	%icc, .LL999	nop	andcc	M, 4, I	ble,pn	%icc, .LL996	nop	LDF	[BUFFER +  0 * SIZE], a1	LDF	[BUFFER +  1 * SIZE], a2	LDF	[BUFFER +  2 * SIZE], a3	LDF	[BUFFER +  3 * SIZE], a4	add	BUFFER, 4 * SIZE, BUFFER	LDF	[Y], y1	add	Y, INCY, Y	LDF	[Y], y2	add	Y, INCY, Y	LDF	[Y], y3	add	Y, INCY, Y	LDF	[Y], y4	add	Y, INCY, Y	FADD	y1, a1, y1	FADD	y2, a2, y2	FADD	y3, a3, y3	FADD	y4, a4, y4	STF	y1, [Y1]	add	Y1, INCY, Y1	STF	y2, [Y1]	add	Y1, INCY, Y1	STF	y3, [Y1]	add	Y1, INCY, Y1	STF	y4, [Y1]	add	Y1, INCY, Y1	.LL996:	andcc	M, 2, I	ble,pn	%icc, .LL997	nop	LDF	[BUFFER +  0 * SIZE], a1	LDF	[BUFFER +  1 * SIZE], a2	add	BUFFER, 2 * SIZE, BUFFER	LDF	[Y], y1	add	Y, INCY, Y	LDF	[Y], y2	add	Y, INCY, Y	FADD	y1, a1, y1	FADD	y2, a2, y2	STF	y1, [Y1]	add	Y1, INCY, Y1	STF	y2, [Y1]	add	Y1, INCY, Y1	.LL997:	andcc	M, 1, I	ble,pn	%icc, .LL999	nop	LDF	[BUFFER +  0 * SIZE], a1	LDF	[Y], y1	FADD	y1, a1, y1	STF	y1, [Y1].LL999:	return	%i7 + 8	clr	%o0	EPILOGUE

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -