⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 symv_l_sse2.s

📁 Optimized GotoBLAS libraries
💻 S
📖 第 1 页 / 共 2 页
字号:
	movhpd	 yy2, 3 * SIZE(YY)	movsd	 6 * SIZE(YY), yy2	movhpd	 7 * SIZE(YY), yy2	movapd	 xtemp1, xt1	mulpd	 a3,     xt1	mulpd	 atemp1, a3	addpd	 xt1,    xsum1	addpd	 a3,     yy1	movsd	 6 * SIZE(A1, LDA, 1), a3	movhpd	 7 * SIZE(A1, LDA, 1), a3	PREFETCH	PREFETCHSIZE(A2)	movapd	 xtemp2, xt1	mulpd	 a1,     xt1	mulpd	 atemp1, a1	addpd	 xt1,    xsum1	addpd	 a1,     yy2	movsd	 4 * SIZE(A2), a1	movhpd	 5 * SIZE(A2), a1	movapd	 xtemp1, xt1	mulpd	 a2,     xt1	mulpd	 atemp2, a2	addpd	 xt1,    xsum2	addpd	 a2,     yy1	movsd	 6 * SIZE(A2), a2	movhpd	 7 * SIZE(A2), a2#if !defined(CORE2) && !defined(PENRYN)	PREFETCHW	PREFETCHSIZE(YY)#endif	movapd	 xtemp2, xt1	mulpd	 a3,     xt1	mulpd	 atemp2, a3	addpd	 xt1,    xsum2	addpd	 a3,     yy2	movsd	 4 * SIZE(A2, LDA, 1), a3	movhpd	 5 * SIZE(A2, LDA, 1), a3	movapd	 xtemp1, xt1	mulpd	 a1,     xt1	mulpd	 atemp3, a1	addpd	 xt1,    xsum3	addpd	 a1,     yy1	movsd	 6 * SIZE(A2, LDA, 1), a1	movhpd	 7 * SIZE(A2, LDA, 1), a1	PREFETCH	PREFETCHSIZE(A2, LDA, 1)	movapd	 xtemp2, xt1	mulpd	 a2,     xt1	mulpd	 atemp3, a2	addpd	 xt1,    xsum3	addpd	 a2,     yy2	movsd	10 * SIZE(A1), a2	movhpd	11 * SIZE(A1), a2	movapd	 xtemp1, xt1	movapd	 8 * SIZE(XX), xtemp1	mulpd	 a3,     xt1	mulpd	 atemp4, a3	addpd	 xt1,    xsum4	addpd	 a3,     yy1	movsd	 8 * SIZE(A1, LDA, 1), a3	movhpd	 9 * SIZE(A1, LDA, 1), a3	movapd	 xtemp2, xt1	movapd	10 * SIZE(XX), xtemp2	mulpd	 a1,     xt1	mulpd	 atemp4, a1	addpd	 xt1,    xsum4	addpd	 a1,     yy2	movsd	 8 * SIZE(A1), a1	movhpd	 9 * SIZE(A1), a1	movsd	 yy1, 4 * SIZE(YY)	movhpd	 yy1, 5 * SIZE(YY)	movsd	 8 * SIZE(YY), yy1	movhpd	 9 * SIZE(YY), yy1	movsd	 yy2, 6 * SIZE(YY)	movhpd	 yy2, 7 * SIZE(YY)	movsd	10 * SIZE(YY), yy2	movhpd	11 * SIZE(YY), yy2	addq	 $8 * SIZE, XX	addq	 $8 * SIZE, YY	addq	 $8 * SIZE, A1	addq	 $8 * SIZE, A2	decq	 I	jg	 .L12	ALIGN_3.L15:	movq	M,  I	subq	IS, I	subq	$4, I	test	$4, I	jle	.L17	movapd	 xtemp1, xt1	mulpd	 a1,     xt1	mulpd	 atemp1, a1	addpd	 xt1,    xsum1	addpd	 a1,     yy1	movsd	 2 * SIZE(A1, LDA, 1), a1	movhpd	 3 * SIZE(A1, LDA, 1), a1	movapd	 xtemp2, xt1	mulpd	 a2,     xt1	mulpd	 atemp1, a2	addpd	 xt1,    xsum1	addpd	 a2,     yy2	movsd	 0 * SIZE(A2), a2	movhpd	 1 * SIZE(A2), a2	movapd	 xtemp1, xt1	mulpd	 a3,     xt1	mulpd	 atemp2, a3	addpd	 xt1,    xsum2	addpd	 a3,     yy1	movsd	 2 * SIZE(A2), a3	movhpd	 3 * SIZE(A2), a3	movapd	 xtemp2, xt1	mulpd	 a1,     xt1	mulpd	 atemp2, a1	addpd	 xt1,    xsum2	addpd	 a1,     yy2	movsd	 0 * SIZE(A2, LDA, 1), a1	movhpd	 1 * SIZE(A2, LDA, 1), a1	movapd	 xtemp1, xt1	mulpd	 a2,     xt1	mulpd	 atemp3, a2	addpd	 xt1,    xsum3	addpd	 a2,     yy1	movsd	 2 * SIZE(A2, LDA, 1), a2	movhpd	 3 * SIZE(A2, LDA, 1), a2	movapd	 xtemp2, xt1	mulpd	 a3,     xt1	mulpd	 atemp3, a3	addpd	 xt1,    xsum3	addpd	 a3,     yy2	movsd	 4 * SIZE(A1, LDA, 1), a3	movhpd	 5 * SIZE(A1, LDA, 1), a3	movapd	 xtemp1, xt1	movapd	 4 * SIZE(XX), xtemp1	mulpd	 a1,     xt1	mulpd	 atemp4, a1	addpd	 xt1,    xsum4	addpd	 a1,     yy1	movsd	 4 * SIZE(A1), a1	movhpd	 5 * SIZE(A1), a1	movapd	 xtemp2, xt1	movapd	 6 * SIZE(XX), xtemp2	mulpd	 a2,     xt1	mulpd	 atemp4, a2	addpd	 xt1,    xsum4	addpd	 a2,     yy2	movsd	 6 * SIZE(A1), a2	movhpd	 7 * SIZE(A1), a2	movsd	 yy1, 0 * SIZE(YY)	movhpd	 yy1, 1 * SIZE(YY)	movsd	 4 * SIZE(YY), yy1	movhpd	 5 * SIZE(YY), yy1	movsd	 yy2, 2 * SIZE(YY)	movhpd	 yy2, 3 * SIZE(YY)	movsd	 6 * SIZE(YY), yy2	movhpd	 7 * SIZE(YY), yy2	addq	 $4 * SIZE, XX	addq	 $4 * SIZE, YY	addq	 $4 * SIZE, A1	addq	 $4 * SIZE, A2	ALIGN_3.L17:	testq	$2, M	jle	.L18	movapd	 xtemp1, xt1	mulpd	 a1,     xt1	mulpd	 atemp1, a1	addpd	 xt1,    xsum1	addpd	 a1,     yy1	movsd	 0 * SIZE(A1, LDA, 1), a1	movhpd	 1 * SIZE(A1, LDA, 1), a1	movapd	 xtemp1, xt1	mulpd	 a1,     xt1	mulpd	 atemp2, a1	addpd	 xt1,    xsum2	addpd	 a1,     yy1	movsd	 0 * SIZE(A2), a1	movhpd	 1 * SIZE(A2), a1	movapd	 xtemp1, xt1	mulpd	 a1,     xt1	mulpd	 atemp3, a1	addpd	 xt1,    xsum3	addpd	 a1,     yy1	movsd	 0 * SIZE(A2, LDA, 1), a1	movhpd	 1 * SIZE(A2, LDA, 1), a1	movapd	 xtemp1, xt1	movapd	 2 * SIZE(XX), xtemp1	mulpd	 a1,     xt1	mulpd	 atemp4, a1	addpd	 xt1,    xsum4	addpd	 a1,     yy1	movsd	 2 * SIZE(A1), a1	movsd	 yy1, 0 * SIZE(YY)	movhpd	 yy1, 1 * SIZE(YY)	movsd	 2 * SIZE(YY), yy1	addq	 $2 * SIZE, XX	addq	 $2 * SIZE, YY	addq	 $2 * SIZE, A1	addq	 $2 * SIZE, A2	ALIGN_3.L18:	testq	$1, M	jle	.L19	movapd	 xtemp1, xt1	mulsd	 a1,     xt1	mulsd	 atemp1, a1	addsd	 xt1,    xsum1	addpd	 a1,     yy1	movsd	 0 * SIZE(A1, LDA, 1), a1	movapd	 xtemp1, xt1	mulsd	 a1,     xt1	mulsd	 atemp2, a1	addsd	 xt1,    xsum2	addsd	 a1,     yy1	movsd	 0 * SIZE(A2), a1	movapd	 xtemp1, xt1	mulsd	 a1,     xt1	mulsd	 atemp3, a1	addsd	 xt1,    xsum3	addsd	 a1,     yy1	movsd	 0 * SIZE(A2, LDA, 1), a1	movapd	 xtemp1, xt1	mulsd	 a1,     xt1	mulsd	 atemp4, a1	addsd	 xt1,    xsum4	addsd	 a1,     yy1	movsd	 yy1, 0 * SIZE(YY)	ALIGN_3.L19:#ifndef HAVE_SSE3	movapd	xsum1, atemp1	movapd	xsum3, atemp3	unpcklpd xsum2, xsum1	unpcklpd xsum4, xsum3	unpckhpd xsum2, atemp1	unpckhpd xsum4, atemp3	addpd	 atemp1, xsum1	addpd	 atemp3, xsum3#else	haddpd	 xsum2, xsum1	haddpd	 xsum4, xsum3#endif	movsd	 0 * SIZE(NEW_Y, IS, SIZE), yy1	movhpd	 1 * SIZE(NEW_Y, IS, SIZE), yy1	movsd	 2 * SIZE(NEW_Y, IS, SIZE), yy2	movhpd	 3 * SIZE(NEW_Y, IS, SIZE), yy2	addpd	 xsum1, yy1	addpd	 xsum3, yy2	movsd	 yy1, 0 * SIZE(NEW_Y, IS, SIZE)	movhpd	 yy1, 1 * SIZE(NEW_Y, IS, SIZE)	movsd	 yy2, 2 * SIZE(NEW_Y, IS, SIZE)	movhpd	 yy2, 3 * SIZE(NEW_Y, IS, SIZE)	addq	 $4, IS	movq	 IS, I	addq	 $4, I	cmpq	 M, I	jle	 .L11	ALIGN_3.L20:	testq	$2, M	jle	.L30	movq	A,  A1	leaq	2 * SIZE(A, LDA, 2), A	movapd		0 * SIZE(NEW_X, IS, SIZE), atemp2	movsd	 0 * SIZE(A1), xsum1	movhpd	 1 * SIZE(A1), xsum1	mulpd	 atemp2, xsum1	movsd	 1 * SIZE(A1), xsum2	movhpd	 1 * SIZE(A1, LDA, 1), xsum2	mulpd	 atemp2, xsum2#ifndef HAVE_SSE3	movapd	 atemp2, atemp1	unpcklpd atemp1, atemp1#else	movddup	 atemp2, atemp1#endif	unpckhpd atemp2, atemp2	testq	$1, M	jle	.L29	movsd	 2 * SIZE(A1), a1	movsd	 2 * SIZE(A1, LDA, 1), a2	movsd	 2 * SIZE(NEW_X, IS, SIZE), xtemp1	movsd	 2 * SIZE(NEW_Y, IS, SIZE), yy1	movapd	 xtemp1, xt1	mulsd	 a1,     xt1	mulsd	 atemp1, a1	addsd	 xt1,    xsum1	addpd	 a1,     yy1	movapd	 xtemp1, xt1	mulsd	 a2,     xt1	mulsd	 atemp2, a2	addsd	 xt1,    xsum2	addsd	 a2,     yy1	movsd	 yy1, 2 * SIZE(NEW_Y, IS, SIZE)	ALIGN_3.L29:#ifndef HAVE_SSE3	movapd	xsum1, atemp1	unpcklpd xsum2, xsum1	unpckhpd xsum2, atemp1	addpd	 atemp1, xsum1#else	haddpd	 xsum2, xsum1#endif	movsd	 0 * SIZE(NEW_Y, IS, SIZE), yy1	movhpd	 1 * SIZE(NEW_Y, IS, SIZE), yy1	addpd	 xsum1, yy1	movsd	 yy1, 0 * SIZE(NEW_Y, IS, SIZE)	movhpd	 yy1, 1 * SIZE(NEW_Y, IS, SIZE)	addq	 $2, IS	ALIGN_3.L30:	testq	$1, M	jle	.L990	movsd	 0 * SIZE(A), xsum1	movsd	 0 * SIZE(NEW_X, IS, SIZE), atemp1	movsd	 0 * SIZE(NEW_Y, IS, SIZE), yy1	mulsd	 atemp1, xsum1	addsd	 xsum1, yy1	movsd	 yy1, 0 * SIZE(NEW_Y, IS, SIZE)	ALIGN_3.L990:	cmpq   $SIZE, INCY	je    .L999	movq	M,  %rax	sarq	$3, %rax	jle	.L997	ALIGN_3.L996:	movapd	 0 * SIZE(NEW_Y), %xmm0	movapd	 2 * SIZE(NEW_Y), %xmm1	movapd	 4 * SIZE(NEW_Y), %xmm2	movapd	 6 * SIZE(NEW_Y), %xmm3	movsd	%xmm0,  0 * SIZE(Y)	addq	INCY, Y	movhpd	%xmm0,  0 * SIZE(Y)	addq	INCY, Y	movsd	%xmm1,  0 * SIZE(Y)	addq	INCY, Y	movhpd	%xmm1,  0 * SIZE(Y)	addq	INCY, Y	movsd	%xmm2,  0 * SIZE(Y)	addq	INCY, Y	movhpd	%xmm2,  0 * SIZE(Y)	addq	INCY, Y	movsd	%xmm3,  0 * SIZE(Y)	addq	INCY, Y	movhpd	%xmm3,  0 * SIZE(Y)	addq	INCY, Y	addq	$8 * SIZE, NEW_Y	decq	%rax	jg	.L996	ALIGN_3.L997:	movq	M, %rax	andq	$7, %rax	jle	.L999	ALIGN_3.L998:	movsd	0 * SIZE(NEW_Y), %xmm0	movsd	%xmm0,  0 * SIZE(Y)	addq	INCY, Y	addq	$1 * SIZE, NEW_Y	decq	%rax	jg	.L998	ALIGN_3.L999:	movq	  0(%rsp), %rbx	movq	  8(%rsp), %rbp	movq	 16(%rsp), %r12	movq	 24(%rsp), %r13	movq	 32(%rsp), %r14	movq	 40(%rsp), %r15#ifdef WINDOWS_ABI	movq	 48(%rsp), %rdi	movq	 56(%rsp), %rsi	movups	 64(%rsp), %xmm6	movups	 80(%rsp), %xmm7	movups	 96(%rsp), %xmm8	movups	112(%rsp), %xmm9	movups	128(%rsp), %xmm10	movups	144(%rsp), %xmm11	movups	160(%rsp), %xmm12	movups	176(%rsp), %xmm13	movups	192(%rsp), %xmm14	movups	208(%rsp), %xmm15#endif	addq	$STACKSIZE, %rsp	ret	EPILOGUE

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -