⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 zsymv_u_sse2.s

📁 Optimized GotoBLAS libraries
💻 S
📖 第 1 页 / 共 2 页
字号:
	mulpd	  atemp3, a3	addpd	  xt1,    xsum2	addpd	  a3,     yy1	MOVDDUP(4 * SIZE, A1, a3)	movapd	  xtemp4, xt1	movapd	14 * SIZE(XX), xtemp4	mulpd	  a1,     xt1	mulpd	  atemp2, a1	ADD	  xt1,    xsum1	addpd	  a1,     yy2	MOVDDUP(6 * SIZE, A2, a1)	movlpd	 yy2, 2 * SIZE(YY)	movhpd	 yy2, 3 * SIZE(YY)	movsd	 6 * SIZE(YY), yy2	movhpd	 7 * SIZE(YY), yy2	movapd	  xtemp2, xt1	movapd	10 * SIZE(XX), xtemp2	mulpd	  a2,     xt1	mulpd	  atemp4, a2	ADD	  xt1,    xsum2	addpd	  a2,     yy1	MOVDDUP(5 * SIZE, A1, a2)	PREFETCH	PREFETCHSIZE(A2)	movlpd	 yy1, 0 * SIZE(YY)	movhpd	 yy1, 1 * SIZE(YY)	movsd	 4 * SIZE(YY), yy1	movhpd	 5 * SIZE(YY), yy1	movapd	  xtemp1, xt1	mulpd	  a3,     xt1	mulpd	  atemp1, a3	addpd	  xt1,    xsum1	addpd	  a3,     yy1	MOVDDUP(7 * SIZE, A2, a3)	movapd	  xtemp3, xt1	mulpd	  a1,     xt1	mulpd	  atemp3, a1	addpd	  xt1,    xsum2	addpd	  a1,     yy2	MOVDDUP(6 * SIZE, A1, a1)	movapd	  xtemp2, xt1	mulpd	  a2,     xt1	mulpd	  atemp2, a2	ADD	  xt1,    xsum1	addpd	  a2,     yy1	MOVDDUP(4 * SIZE, A2, a2)	PREFETCHW	PREFETCHSIZE(YY)	movapd	  xtemp4, xt1	mulpd	  a3,     xt1	mulpd	  atemp4, a3	ADD	  xt1,    xsum2	addpd	  a3,     yy2	MOVDDUP(7 * SIZE, A1, a3)	movapd	  xtemp3, xt1	movapd	20 * SIZE(XX), xtemp3	mulpd	  a1,     xt1	mulpd	  atemp1, a1	addpd	  xt1,    xsum1	addpd	  a1,     yy2	MOVDDUP(5 * SIZE, A2, a1)	movapd	  xtemp1, xt1	movapd	16 * SIZE(XX), xtemp1	mulpd	  a2,     xt1	mulpd	  atemp3, a2	addpd	  xt1,    xsum2	addpd	  a2,     yy1	MOVDDUP(10 * SIZE, A2, a2)	movapd	  xtemp4, xt1	movapd	22 * SIZE(XX), xtemp4	mulpd	  a3,     xt1	mulpd	  atemp2, a3	ADD	  xt1,    xsum1	addpd	  a3,     yy2	MOVDDUP( 9 * SIZE, A1, a3)	movlpd	 yy2, 6 * SIZE(YY)	movhpd	 yy2, 7 * SIZE(YY)	movsd	10 * SIZE(YY), yy2	movhpd	11 * SIZE(YY), yy2	movapd	  xtemp2, xt1	movapd	18 * SIZE(XX), xtemp2	mulpd	  a1,     xt1	mulpd	  atemp4, a1	ADD	  xt1,    xsum2	addpd	  a1,     yy1	MOVDDUP( 8 * SIZE, A1, a1)	movlpd	 yy1, 4 * SIZE(YY)	movhpd	 yy1, 5 * SIZE(YY)	movsd	 8 * SIZE(YY), yy1	movhpd	 9 * SIZE(YY), yy1	subq	 $-16 * SIZE, XX	addq	 $  8 * SIZE, YY	addq	 $  8 * SIZE, A1	addq	 $  8 * SIZE, A2	decq	 I	jg	 .L12	ALIGN_3.L15:	testq	$2, IS	jle	.L18	movapd	  xtemp1, xt1	mulpd	  a1,     xt1	mulpd	  atemp1, a1	addpd	  xt1,    xsum1	addpd	  a1,     yy1	MOVDDUP(1 * SIZE, A1, a1)	movapd	  xtemp3, xt1	mulpd	  a2,     xt1	mulpd	  atemp3, a2	addpd	  xt1,    xsum2	addpd	  a2,     yy2	MOVDDUP(3 * SIZE, A2, a2)	movapd	  xtemp2, xt1	mulpd	  a1,     xt1	mulpd	  atemp2, a1	ADD	  xt1,    xsum1	addpd	  a1,     yy1	MOVDDUP(2 * SIZE, A1, a1)	movapd	  xtemp4, xt1	mulpd	  a2,     xt1	mulpd	  atemp4, a2	ADD	  xt1,    xsum2	addpd	  a2,     yy2	MOVDDUP(0 * SIZE, A2, a2)	movapd	  xtemp3, xt1	mulpd	  a1,     xt1	mulpd	  atemp1, a1	addpd	  xt1,    xsum1	addpd	  a1,     yy2	MOVDDUP(3 * SIZE, A1, a1)	movapd	  xtemp1, xt1	mulpd	  a2,     xt1	mulpd	  atemp3, a2	addpd	  xt1,    xsum2	addpd	  a2,     yy1	MOVDDUP(1 * SIZE, A2, a2)	movapd	  xtemp4, xt1	mulpd	  a1,     xt1	mulpd	  atemp2, a1	ADD	  xt1,    xsum1	addpd	  a1,     yy2	movlpd	 yy2, 2 * SIZE(YY)	movhpd	 yy2, 3 * SIZE(YY)	movsd	 6 * SIZE(YY), yy2	movhpd	 7 * SIZE(YY), yy2	movapd	  xtemp2, xt1	mulpd	  a2,     xt1	mulpd	  atemp4, a2	ADD	  xt1,    xsum2	addpd	  a2,     yy1	movlpd	 yy1, 0 * SIZE(YY)	movhpd	 yy1, 1 * SIZE(YY)	movsd	 4 * SIZE(YY), yy1	movhpd	 5 * SIZE(YY), yy1	addq	 $4 * SIZE, YY	addq	 $4 * SIZE, A1	addq	 $4 * SIZE, A2	ALIGN_3.L18:	MOVDDUP(0 * SIZE, A1, a1)	MOVDDUP(0 * SIZE, A2, a2)	mulpd	  atemp1, a1	mulpd	  atemp1, a2	addpd	  a1,     xsum1	addpd	  a2,     xsum2#ifndef HEMV	MOVDDUP(1 * SIZE, A1, a1)	MOVDDUP(1 * SIZE, A2, a2)	mulpd	  atemp2, a1	mulpd	  atemp2, a2	addpd	  a1,     xsum1	addpd	  a2,     xsum2#else	MOVDDUP(1 * SIZE, A2, a2)	mulpd	  atemp2, a2	subpd	  a2,     xsum2#endif	MOVDDUP(0 * SIZE, A2, a1)	MOVDDUP(2 * SIZE, A2, a2)	mulpd	  atemp3, a1	mulpd	  atemp3, a2	addpd	  a1,     xsum1	addpd	  a2,     xsum2#ifndef HEMV	MOVDDUP(1 * SIZE, A2, a1)	MOVDDUP(3 * SIZE, A2, a2)	mulpd	  atemp4, a1	mulpd	  atemp4, a2	addpd	  a1,     xsum1	addpd	  a2,     xsum2#else	MOVDDUP(1 * SIZE, A2, a1)	mulpd	  atemp4, a1	addpd	  a1,     xsum1#endif	addpd	 xsum1, yy1	addpd	 xsum2, yy2	movlpd	 yy1, 0 * SIZE(YY)	movhpd	 yy1, 1 * SIZE(YY)	movlpd	 yy2, 2 * SIZE(YY)	movhpd	 yy2, 3 * SIZE(YY)	addq	 $2, IS	movq	 IS, I	addq	 $2, I	cmpq	 M, I	jle	 .L11	ALIGN_3.L20:	testq	$1, M	jle	.L990	movq	A,  A1	leaq	(, IS, 4), I	movapd		0 * SIZE(NEW_X, I, SIZE), atemp1	movapd		2 * SIZE(NEW_X, I, SIZE), atemp2	pxor		xsum1, xsum1	pxor		xsum2, xsum2	MOVDDUP(0 * SIZE, A1, a1)	MOVDDUP(1 * SIZE, A1, a2)	movapd	 0 * SIZE(NEW_X), xtemp1	movapd	 2 * SIZE(NEW_X), xtemp2	movapd	 4 * SIZE(NEW_X), xtemp3	movapd	 6 * SIZE(NEW_X), xtemp4	movsd	 0 * SIZE(NEW_Y), yy1	movhpd	 1 * SIZE(NEW_Y), yy1	movsd	 2 * SIZE(NEW_Y), yy2	movhpd	 3 * SIZE(NEW_Y), yy2	movq		NEW_X, XX	movq		NEW_Y, YY	movq	IS,  I	sarq	$1,  I	jle	.L28	ALIGN_3.L22:	movapd	  xtemp1, xt1	movapd	 8 * SIZE(XX), xtemp1	mulpd	  a1,     xt1	mulpd	  atemp1, a1	addpd	  xt1,    xsum1	addpd	  a1,     yy1	MOVDDUP(2 * SIZE, A1, a1)	movapd	  xtemp2, xt1	movapd	10 * SIZE(XX), xtemp2	mulpd	  a2,     xt1	mulpd	  atemp2, a2	ADD	  xt1,    xsum2	addpd	  a2,     yy1	MOVDDUP(3 * SIZE, A1, a2)	movlpd	 yy1, 0 * SIZE(YY)	movhpd	 yy1, 1 * SIZE(YY)	movsd	 4 * SIZE(YY), yy1	movhpd	 5 * SIZE(YY), yy1	movapd	  xtemp3, xt1	movapd	12 * SIZE(XX), xtemp3	mulpd	  a1,     xt1	mulpd	  atemp1, a1	addpd	  xt1,    xsum1	addpd	  a1,     yy2	MOVDDUP(4 * SIZE, A1, a1)	movapd	  xtemp4, xt1	movapd	14 * SIZE(XX), xtemp4	mulpd	  a2,     xt1	mulpd	  atemp2, a2	ADD	  xt1,    xsum2	addpd	  a2,     yy2	MOVDDUP(5 * SIZE, A1, a2)	movlpd	 yy2, 2 * SIZE(YY)	movhpd	 yy2, 3 * SIZE(YY)	movsd	 6 * SIZE(YY), yy2	movhpd	 7 * SIZE(YY), yy2	addq	 $8 * SIZE, XX	addq	 $4 * SIZE, YY	addq	 $4 * SIZE, A1	decq	 I	jg	 .L22	ALIGN_3.L28:	MOVDDUP(0 * SIZE, A1, a1)#ifndef HEMV	MOVDDUP(1 * SIZE, A1, a2)	mulpd	  atemp1, a1	mulpd	  atemp2, a2	addpd	  a1,     xsum1	addpd	  a2,     xsum2#else	mulpd	  atemp1, a1	addpd	  a1,     xsum1#endif	addpd	 xsum2, xsum1	addpd	 xsum1, yy1	movlpd	 yy1, 0 * SIZE(YY)	movhpd	 yy1, 1 * SIZE(YY)	ALIGN_3.L990:	cmpq   $2 * SIZE, INCY	je    .L999	movq	M,  %rax	sarq	$2, %rax	jle	.L997	ALIGN_3.L996:	movapd	 0 * SIZE(NEW_Y), %xmm0	movapd	 2 * SIZE(NEW_Y), %xmm1	movapd	 4 * SIZE(NEW_Y), %xmm2	movapd	 6 * SIZE(NEW_Y), %xmm3	movsd	%xmm0,  0 * SIZE(Y)	movhpd	%xmm0,  1 * SIZE(Y)	addq	INCY, Y	movsd	%xmm1,  0 * SIZE(Y)	movhpd	%xmm1,  1 * SIZE(Y)	addq	INCY, Y	movsd	%xmm2,  0 * SIZE(Y)	movhpd	%xmm2,  1 * SIZE(Y)	addq	INCY, Y	movsd	%xmm3,  0 * SIZE(Y)	movhpd	%xmm3,  1 * SIZE(Y)	addq	INCY, Y	addq	$8 * SIZE, NEW_Y	decq	%rax	jg	.L996	ALIGN_3.L997:	movq	M, %rax	andq	$3, %rax	jle	.L999	ALIGN_3.L998:	movapd	0 * SIZE(NEW_Y), %xmm0	movsd	%xmm0,  0 * SIZE(Y)	movhpd	%xmm0,  1 * SIZE(Y)	addq	INCY, Y	addq	$2 * SIZE, NEW_Y	decq	%rax	jg	.L998	ALIGN_3.L999:	movq	  0(%rsp), %rbx	movq	  8(%rsp), %rbp	movq	 16(%rsp), %r12	movq	 24(%rsp), %r13	movq	 32(%rsp), %r14	movq	 40(%rsp), %r15#ifdef WINDOWS_ABI	movq	 48(%rsp), %rdi	movq	 56(%rsp), %rsi	movups	 64(%rsp), %xmm6	movups	 80(%rsp), %xmm7	movups	 96(%rsp), %xmm8	movups	112(%rsp), %xmm9	movups	128(%rsp), %xmm10	movups	144(%rsp), %xmm11	movups	160(%rsp), %xmm12	movups	176(%rsp), %xmm13	movups	192(%rsp), %xmm14	movups	208(%rsp), %xmm15#endif	addq	$STACKSIZE, %rsp	ret	EPILOGUE

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -