⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 zsymv_l_sse2.s

📁 Optimized GotoBLAS libraries
💻 S
📖 第 1 页 / 共 2 页
字号:
	movapd	14 * SIZE(XX), xtemp4	addq	  $8 * SIZE, XX	addq	  $4 * SIZE, A1	addq	  $4 * SIZE, A2	movq	 M, I	subq	IS, I	subq	$2, I	sarq	$2, I	jle	.L15	ALIGN_3.L12:	movapd	  xtemp1, xt1	mulpd	  a1,     xt1	mulpd	  atemp1, a1	addpd	  xt1,    xsum1	addpd	  a1,     yy1	MOVDDUP(1 * SIZE, A1, a1)	PREFETCH	PREFETCHSIZE(A1)	movapd	  xtemp3, xt1	mulpd	  a2,     xt1	mulpd	  atemp3, a2	addpd	  xt1,    xsum2	addpd	  a2,     yy2	MOVDDUP(3 * SIZE, A2, a2)	movapd	  xtemp2, xt1	mulpd	  a1,     xt1	mulpd	  atemp2, a1	ADD	  xt1,    xsum1	addpd	  a1,     yy1	MOVDDUP(2 * SIZE, A1, a1)	movapd	  xtemp4, xt1	mulpd	  a2,     xt1	mulpd	  atemp4, a2	ADD	  xt1,    xsum2	addpd	  a2,     yy2	MOVDDUP(0 * SIZE, A2, a2)	PREFETCH	PREFETCHSIZE(XX)	movapd	  xtemp3, xt1	movapd	12 * SIZE(XX), xtemp3	mulpd	  a1,     xt1	mulpd	  atemp1, a1	addpd	  xt1,    xsum1	addpd	  a1,     yy2	MOVDDUP(3 * SIZE, A1, a1)	movapd	  xtemp1, xt1	movapd	 8 * SIZE(XX), xtemp1	mulpd	  a2,     xt1	mulpd	  atemp3, a2	addpd	  xt1,    xsum2	addpd	  a2,     yy1	MOVDDUP(1 * SIZE, A2, a2)	movapd	  xtemp4, xt1	movapd	14 * SIZE(XX), xtemp4	mulpd	  a1,     xt1	mulpd	  atemp2, a1	ADD	  xt1,    xsum1	addpd	  a1,     yy2	MOVDDUP(4 * SIZE, A1, a1)	movlpd	 yy2, 2 * SIZE(YY)	movhpd	 yy2, 3 * SIZE(YY)	movsd	 6 * SIZE(YY), yy2	movhpd	 7 * SIZE(YY), yy2	movapd	  xtemp2, xt1	movapd	10 * SIZE(XX), xtemp2	mulpd	  a2,     xt1	mulpd	  atemp4, a2	ADD	  xt1,    xsum2	addpd	  a2,     yy1	MOVDDUP(6 * SIZE, A2, a2)	PREFETCH	PREFETCHSIZE(A2)	movlpd	 yy1, 0 * SIZE(YY)	movhpd	 yy1, 1 * SIZE(YY)	movsd	 4 * SIZE(YY), yy1	movhpd	 5 * SIZE(YY), yy1	movapd	  xtemp1, xt1	mulpd	  a1,     xt1	mulpd	  atemp1, a1	addpd	  xt1,    xsum1	addpd	  a1,     yy1	MOVDDUP(5 * SIZE, A1, a1)	movapd	  xtemp3, xt1	mulpd	  a2,     xt1	mulpd	  atemp3, a2	addpd	  xt1,    xsum2	addpd	  a2,     yy2	MOVDDUP(7 * SIZE, A2, a2)	movapd	  xtemp2, xt1	mulpd	  a1,     xt1	mulpd	  atemp2, a1	ADD	  xt1,    xsum1	addpd	  a1,     yy1	MOVDDUP(6 * SIZE, A1, a1)	PREFETCHW	PREFETCHSIZE(YY)	movapd	  xtemp4, xt1	mulpd	  a2,     xt1	mulpd	  atemp4, a2	ADD	  xt1,    xsum2	addpd	  a2,     yy2	MOVDDUP(4 * SIZE, A2, a2)	movapd	  xtemp3, xt1	movapd	20 * SIZE(XX), xtemp3	mulpd	  a1,     xt1	mulpd	  atemp1, a1	addpd	  xt1,    xsum1	addpd	  a1,     yy2	MOVDDUP(7 * SIZE, A1, a1)	movapd	  xtemp1, xt1	movapd	16 * SIZE(XX), xtemp1	mulpd	  a2,     xt1	mulpd	  atemp3, a2	addpd	  xt1,    xsum2	addpd	  a2,     yy1	MOVDDUP(5 * SIZE, A2, a2)	movapd	  xtemp4, xt1	movapd	22 * SIZE(XX), xtemp4	mulpd	  a1,     xt1	mulpd	  atemp2, a1	ADD	  xt1,    xsum1	addpd	  a1,     yy2	MOVDDUP( 8 * SIZE, A1, a1)	movlpd	 yy2, 6 * SIZE(YY)	movhpd	 yy2, 7 * SIZE(YY)	movsd	10 * SIZE(YY), yy2	movhpd	11 * SIZE(YY), yy2	movapd	  xtemp2, xt1	movapd	18 * SIZE(XX), xtemp2	mulpd	  a2,     xt1	mulpd	  atemp4, a2	ADD	  xt1,    xsum2	addpd	  a2,     yy1	MOVDDUP(10 * SIZE, A2, a2)	movlpd	 yy1, 4 * SIZE(YY)	movhpd	 yy1, 5 * SIZE(YY)	movsd	 8 * SIZE(YY), yy1	movhpd	 9 * SIZE(YY), yy1	subq	 $-16 * SIZE, XX	addq	 $  8 * SIZE, YY	addq	 $  8 * SIZE, A1	addq	 $  8 * SIZE, A2	decq	 I	jg	 .L12	ALIGN_3.L15:	movq	 M, I	subq	IS, I	subq	$2, I	testq	$2, I	jle	.L16	movapd	  xtemp1, xt1	mulpd	  a1,     xt1	mulpd	  atemp1, a1	addpd	  xt1,    xsum1	addpd	  a1,     yy1	MOVDDUP(1 * SIZE, A1, a1)	movapd	  xtemp3, xt1	mulpd	  a2,     xt1	mulpd	  atemp3, a2	addpd	  xt1,    xsum2	addpd	  a2,     yy2	MOVDDUP(3 * SIZE, A2, a2)	movapd	  xtemp2, xt1	mulpd	  a1,     xt1	mulpd	  atemp2, a1	ADD	  xt1,    xsum1	addpd	  a1,     yy1	MOVDDUP(2 * SIZE, A1, a1)	movapd	  xtemp4, xt1	mulpd	  a2,     xt1	mulpd	  atemp4, a2	ADD	  xt1,    xsum2	addpd	  a2,     yy2	MOVDDUP(0 * SIZE, A2, a2)	movapd	  xtemp3, xt1	movapd	12 * SIZE(XX), xtemp3	mulpd	  a1,     xt1	mulpd	  atemp1, a1	addpd	  xt1,    xsum1	addpd	  a1,     yy2	MOVDDUP(3 * SIZE, A1, a1)	movapd	  xtemp1, xt1	movapd	 8 * SIZE(XX), xtemp1	mulpd	  a2,     xt1	mulpd	  atemp3, a2	addpd	  xt1,    xsum2	addpd	  a2,     yy1	MOVDDUP(1 * SIZE, A2, a2)	movapd	  xtemp4, xt1	movapd	14 * SIZE(XX), xtemp4	mulpd	  a1,     xt1	mulpd	  atemp2, a1	ADD	  xt1,    xsum1	addpd	  a1,     yy2	MOVDDUP(4 * SIZE, A1, a1)	movlpd	 yy2, 2 * SIZE(YY)	movhpd	 yy2, 3 * SIZE(YY)	movsd	 6 * SIZE(YY), yy2	movhpd	 7 * SIZE(YY), yy2	movapd	  xtemp2, xt1	movapd	10 * SIZE(XX), xtemp2	mulpd	  a2,     xt1	mulpd	  atemp4, a2	ADD	  xt1,    xsum2	addpd	  a2,     yy1	movlpd	 yy1, 0 * SIZE(YY)	movhpd	 yy1, 1 * SIZE(YY)	movsd	 4 * SIZE(YY), yy1	movhpd	 5 * SIZE(YY), yy1	addq	 $4 * SIZE, YY	addq	 $4 * SIZE, A1	addq	 $4 * SIZE, A2	ALIGN_3.L16:	testq	$1, M	jle	.L18	MOVDDUP(1 * SIZE, A1, a2)	movapd	  xtemp1, xt1	mulpd	  a1,     xt1	mulpd	  atemp1, a1	addpd	  xt1,    xsum1	addpd	  a1,     yy1	MOVDDUP(0 * SIZE, A2, a1)	movapd	  xtemp2, xt1	mulpd	  a2,     xt1	mulpd	  atemp2, a2	ADD	  xt1,    xsum1	addpd	  a2,     yy1	MOVDDUP(1 * SIZE, A2, a2)	movapd	  xtemp1, xt1	mulpd	  a1,     xt1	mulpd	  atemp3, a1	addpd	  xt1,    xsum2	addpd	  a1,     yy1	movapd	  xtemp2, xt1	mulpd	  a2,     xt1	mulpd	  atemp4, a2	ADD	  xt1,    xsum2	addpd	  a2,     yy1	movlpd	 yy1, 0 * SIZE(YY)	movhpd	 yy1, 1 * SIZE(YY)	ALIGN_3.L18:	leaq	(, IS, SIZE), I	movsd	 0 * SIZE(NEW_Y, I, 2), yy1	movhpd	 1 * SIZE(NEW_Y, I, 2), yy1	movsd	 2 * SIZE(NEW_Y, I, 2), yy2	movhpd	 3 * SIZE(NEW_Y, I, 2), yy2	addpd	 xsum1, yy1	addpd	 xsum2, yy2	movlpd	 yy1, 0 * SIZE(NEW_Y, I, 2)	movhpd	 yy1, 1 * SIZE(NEW_Y, I, 2)	movlpd	 yy2, 2 * SIZE(NEW_Y, I, 2)	movhpd	 yy2, 3 * SIZE(NEW_Y, I, 2)	addq	 $2, IS	movq	 IS, I	addq	 $2, I	cmpq	 M, I	jle	 .L11	ALIGN_3.L20:	testq	$1, M	jle	.L990	leaq	(, IS, SIZE), I	movapd	 0 * SIZE(NEW_X, I, 4), atemp1	movapd	 2 * SIZE(NEW_X, I, 4), atemp2	movsd	 0 * SIZE(NEW_Y, I, 2), yy1	movhpd	 1 * SIZE(NEW_Y, I, 2), yy1#ifndef HEMV	MOVDDUP(0 * SIZE, A, a1)	MOVDDUP(1 * SIZE, A, a2)	mulpd	  atemp1, a1	mulpd	  atemp2, a2	addpd	  a1,     yy1	addpd	  a2,     yy1#else	MOVDDUP(0 * SIZE, A, a1)	mulpd	  atemp1, a1	addpd	  a1,     yy1#endif	movlpd	 yy1, 0 * SIZE(NEW_Y, I, 2)	movhpd	 yy1, 1 * SIZE(NEW_Y, I, 2)	ALIGN_3.L990:	cmpq   $2 * SIZE, INCY	je    .L999	movq	M,  %rax	sarq	$2, %rax	jle	.L997	ALIGN_3.L996:	movapd	 0 * SIZE(NEW_Y), %xmm0	movapd	 2 * SIZE(NEW_Y), %xmm1	movapd	 4 * SIZE(NEW_Y), %xmm2	movapd	 6 * SIZE(NEW_Y), %xmm3	movsd	%xmm0,  0 * SIZE(Y)	movhpd	%xmm0,  1 * SIZE(Y)	addq	INCY, Y	movsd	%xmm1,  0 * SIZE(Y)	movhpd	%xmm1,  1 * SIZE(Y)	addq	INCY, Y	movsd	%xmm2,  0 * SIZE(Y)	movhpd	%xmm2,  1 * SIZE(Y)	addq	INCY, Y	movsd	%xmm3,  0 * SIZE(Y)	movhpd	%xmm3,  1 * SIZE(Y)	addq	INCY, Y	addq	$8 * SIZE, NEW_Y	decq	%rax	jg	.L996	ALIGN_3.L997:	movq	M, %rax	andq	$3, %rax	jle	.L999	ALIGN_3.L998:	movapd	0 * SIZE(NEW_Y), %xmm0	movsd	%xmm0,  0 * SIZE(Y)	movhpd	%xmm0,  1 * SIZE(Y)	addq	INCY, Y	addq	$2 * SIZE, NEW_Y	decq	%rax	jg	.L998	ALIGN_3.L999:	movq	  0(%rsp), %rbx	movq	  8(%rsp), %rbp	movq	 16(%rsp), %r12	movq	 24(%rsp), %r13	movq	 32(%rsp), %r14	movq	 40(%rsp), %r15#ifdef WINDOWS_ABI	movq	 48(%rsp), %rdi	movq	 56(%rsp), %rsi	movups	 64(%rsp), %xmm6	movups	 80(%rsp), %xmm7	movups	 96(%rsp), %xmm8	movups	112(%rsp), %xmm9	movups	128(%rsp), %xmm10	movups	144(%rsp), %xmm11	movups	160(%rsp), %xmm12	movups	176(%rsp), %xmm13	movups	192(%rsp), %xmm14	movups	208(%rsp), %xmm15#endif	addq	$STACKSIZE, %rsp	ret	EPILOGUE

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -