⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 zaxpy_sse2_core2.s

📁 Optimized GotoBLAS libraries
💻 S
📖 第 1 页 / 共 3 页
字号:
	addpd	%xmm1, %xmm8	addpd	%xmm3, %xmm9	addpd	%xmm5, %xmm10	addpd	%xmm7, %xmm11	MOVDDUP( 0 * SIZE, X, %xmm0)	MOVDDUP( 1 * SIZE, X, %xmm1)	addq	 INCX, X	MOVDDUP( 0 * SIZE, X, %xmm2)	MOVDDUP( 1 * SIZE, X, %xmm3)	addq	 INCX, X	MOVDDUP( 0 * SIZE, X, %xmm4)	MOVDDUP( 1 * SIZE, X, %xmm5)	addq	 INCX, X	MOVDDUP( 0 * SIZE, X, %xmm6)	MOVDDUP( 1 * SIZE, X, %xmm7)	addq	 INCX, X	mulpd	%xmm14, %xmm0	mulpd	%xmm14, %xmm2	mulpd	%xmm14, %xmm4	mulpd	%xmm14, %xmm6	movapd	%xmm8,   (YY)	addq	 INCY, YY	movapd	%xmm9,   (YY)	addq	 INCY, YY	movapd	%xmm10,  (YY)	addq	 INCY, YY	movapd	%xmm11,  (YY)	addq	 INCY, YY	movapd	 (Y), %xmm8	addq	 INCY, Y	movapd	 (Y), %xmm9	addq	 INCY, Y	movapd	 (Y), %xmm10	addq	 INCY, Y	movapd	 (Y), %xmm11	addq	 INCY, Y	decq	%rax	jg	.L41	ALIGN_3.L42:	addpd	%xmm0, %xmm8	mulpd	%xmm15, %xmm1	addpd	%xmm2, %xmm9	mulpd	%xmm15, %xmm3	addpd	%xmm4, %xmm10	mulpd	%xmm15, %xmm5	addpd	%xmm6, %xmm11	mulpd	%xmm15, %xmm7	addpd	%xmm1, %xmm8	addpd	%xmm3, %xmm9	addpd	%xmm5, %xmm10	addpd	%xmm7, %xmm11	MOVDDUP( 0 * SIZE, X, %xmm0)	MOVDDUP( 1 * SIZE, X, %xmm1)	addq	 INCX, X	MOVDDUP( 0 * SIZE, X, %xmm2)	MOVDDUP( 1 * SIZE, X, %xmm3)	addq	 INCX, X	MOVDDUP( 0 * SIZE, X, %xmm4)	MOVDDUP( 1 * SIZE, X, %xmm5)	addq	 INCX, X	MOVDDUP( 0 * SIZE, X, %xmm6)	MOVDDUP( 1 * SIZE, X, %xmm7)	addq	 INCX, X	mulpd	%xmm14, %xmm0	mulpd	%xmm14, %xmm2	mulpd	%xmm14, %xmm4	mulpd	%xmm14, %xmm6	movapd	%xmm8,   (YY)	addq	 INCY, YY	movapd	%xmm9,   (YY)	addq	 INCY, YY	movapd	%xmm10,  (YY)	addq	 INCY, YY	movapd	%xmm11,  (YY)	addq	 INCY, YY	movapd	 (Y), %xmm8	addq	 INCY, Y	movapd	 (Y), %xmm9	addq	 INCY, Y	movapd	 (Y), %xmm10	addq	 INCY, Y	movapd	 (Y), %xmm11	addq	 INCY, Y	addpd	%xmm0, %xmm8	mulpd	%xmm15, %xmm1	addpd	%xmm2, %xmm9	mulpd	%xmm15, %xmm3	addpd	%xmm4, %xmm10	mulpd	%xmm15, %xmm5	addpd	%xmm6, %xmm11	mulpd	%xmm15, %xmm7	addpd	%xmm1, %xmm8	addpd	%xmm3, %xmm9	addpd	%xmm5, %xmm10	addpd	%xmm7, %xmm11	movapd	%xmm8,   (YY)	addq	 INCY, YY	movapd	%xmm9,   (YY)	addq	 INCY, YY	movapd	%xmm10,  (YY)	addq	 INCY, YY	movapd	%xmm11,  (YY)	addq	 INCY, YY	ALIGN_3.L45:	movq	M,  %rax	andq	$4, %rax	jle	.L46	MOVDDUP( 0 * SIZE, X, %xmm0)	MOVDDUP( 1 * SIZE, X, %xmm1)	addq	INCX, X	MOVDDUP( 0 * SIZE, X, %xmm2)	MOVDDUP( 1 * SIZE, X, %xmm3)	addq	INCX, X	MOVDDUP( 0 * SIZE, X, %xmm4)	MOVDDUP( 1 * SIZE, X, %xmm5)	addq	INCX, X	MOVDDUP( 0 * SIZE, X, %xmm6)	MOVDDUP( 1 * SIZE, X, %xmm7)	addq	INCX, X	movapd	 (Y), %xmm8	addq	 INCY, Y	movapd	 (Y), %xmm9	addq	 INCY, Y	movapd	 (Y), %xmm10	addq	 INCY, Y	movapd	 (Y), %xmm11	addq	 INCY, Y	mulpd	%xmm14, %xmm0	mulpd	%xmm14, %xmm2	mulpd	%xmm14, %xmm4	mulpd	%xmm14, %xmm6	addpd	%xmm0, %xmm8	mulpd	%xmm15, %xmm1	addpd	%xmm2, %xmm9	mulpd	%xmm15, %xmm3	addpd	%xmm4, %xmm10	mulpd	%xmm15, %xmm5	addpd	%xmm6, %xmm11	mulpd	%xmm15, %xmm7	addpd	%xmm1, %xmm8	addpd	%xmm3, %xmm9	addpd	%xmm5, %xmm10	addpd	%xmm7, %xmm11	movapd	%xmm8,  (YY)	addq	 INCY, YY	movapd	%xmm9,  (YY)	addq	 INCY, YY	movapd	%xmm10, (YY)	addq	 INCY, YY	movapd	%xmm11, (YY)	addq	 INCY, YY	ALIGN_3.L46:	movq	M,  %rax	andq	$2, %rax	jle	.L47	MOVDDUP( 0 * SIZE, X, %xmm0)	MOVDDUP( 1 * SIZE, X, %xmm1)	addq	INCX, X	MOVDDUP( 0 * SIZE, X, %xmm2)	MOVDDUP( 1 * SIZE, X, %xmm3)	addq	INCX, X	movapd	 (Y), %xmm8	addq	 INCY, Y	movapd	 (Y), %xmm9	addq	 INCY, Y	mulpd	%xmm14, %xmm0	mulpd	%xmm14, %xmm2	mulpd	%xmm15, %xmm1	mulpd	%xmm15, %xmm3	addpd	%xmm0, %xmm8	addpd	%xmm2, %xmm9	addpd	%xmm1, %xmm8	addpd	%xmm3, %xmm9	movapd	%xmm8,   (YY)	addq	 INCY, YY	movapd	%xmm9,   (YY)	addq	 INCY, YY	ALIGN_3.L47:	movq	M,  %rax	andq	$1, %rax	jle	.L999	MOVDDUP( 0 * SIZE, X, %xmm0)	MOVDDUP( 1 * SIZE, X, %xmm1)	movapd	 (Y), %xmm8	mulpd	%xmm14, %xmm0	mulpd	%xmm15, %xmm1	addpd	%xmm0, %xmm8	addpd	%xmm1, %xmm8	movapd	%xmm8,   (YY)	jmp	.L999	ALIGN_3.L50:	movq	Y, YY	movq	M,  %rax	sarq	$3, %rax	jle	.L55	MOVDDUP( 0 * SIZE, X, %xmm0)	MOVDDUP( 1 * SIZE, X, %xmm1)	addq	 INCX, X	MOVDDUP( 0 * SIZE, X, %xmm2)	MOVDDUP( 1 * SIZE, X, %xmm3)	addq	 INCX, X	MOVDDUP( 0 * SIZE, X, %xmm4)	MOVDDUP( 1 * SIZE, X, %xmm5)	addq	 INCX, X	MOVDDUP( 0 * SIZE, X, %xmm6)	MOVDDUP( 1 * SIZE, X, %xmm7)	addq	 INCX, X	movsd	 0 * SIZE(Y), %xmm8	movhpd	 1 * SIZE(Y), %xmm8	addq	 INCY, Y	movsd	 0 * SIZE(Y), %xmm9	movhpd	 1 * SIZE(Y), %xmm9	addq	 INCY, Y	movsd	 0 * SIZE(Y), %xmm10	movhpd	 1 * SIZE(Y), %xmm10	addq	 INCY, Y	movsd	 0 * SIZE(Y), %xmm11	movhpd	 1 * SIZE(Y), %xmm11	addq	 INCY, Y	mulpd	%xmm14, %xmm0	mulpd	%xmm14, %xmm2	mulpd	%xmm14, %xmm4	mulpd	%xmm14, %xmm6	decq	%rax	jle	.L52	ALIGN_3.L51:	addpd	%xmm0, %xmm8	mulpd	%xmm15, %xmm1	addpd	%xmm2, %xmm9	mulpd	%xmm15, %xmm3	addpd	%xmm4, %xmm10	mulpd	%xmm15, %xmm5	addpd	%xmm6, %xmm11	mulpd	%xmm15, %xmm7	addpd	%xmm1, %xmm8	addpd	%xmm3, %xmm9	addpd	%xmm5, %xmm10	addpd	%xmm7, %xmm11	MOVDDUP( 0 * SIZE, X, %xmm0)	MOVDDUP( 1 * SIZE, X, %xmm1)	addq	 INCX, X	MOVDDUP( 0 * SIZE, X, %xmm2)	MOVDDUP( 1 * SIZE, X, %xmm3)	addq	 INCX, X	MOVDDUP( 0 * SIZE, X, %xmm4)	MOVDDUP( 1 * SIZE, X, %xmm5)	addq	 INCX, X	MOVDDUP( 0 * SIZE, X, %xmm6)	MOVDDUP( 1 * SIZE, X, %xmm7)	addq	 INCX, X	mulpd	%xmm14, %xmm0	mulpd	%xmm14, %xmm2	mulpd	%xmm14, %xmm4	mulpd	%xmm14, %xmm6	movsd	%xmm8,   0 * SIZE(YY)	movhpd	%xmm8,   1 * SIZE(YY)	addq	 INCY, YY	movsd	%xmm9,   0 * SIZE(YY)	movhpd	%xmm9,   1 * SIZE(YY)	addq	 INCY, YY	movsd	%xmm10,  0 * SIZE(YY)	movhpd	%xmm10,  1 * SIZE(YY)	addq	 INCY, YY	movsd	%xmm11,  0 * SIZE(YY)	movhpd	%xmm11,  1 * SIZE(YY)	addq	 INCY, YY	movsd	 0 * SIZE(Y), %xmm8	movhpd	 1 * SIZE(Y), %xmm8	addq	 INCY, Y	movsd	 0 * SIZE(Y), %xmm9	movhpd	 1 * SIZE(Y), %xmm9	addq	 INCY, Y	movsd	 0 * SIZE(Y), %xmm10	movhpd	 1 * SIZE(Y), %xmm10	addq	 INCY, Y	movsd	 0 * SIZE(Y), %xmm11	movhpd	 1 * SIZE(Y), %xmm11	addq	 INCY, Y	addpd	%xmm0, %xmm8	mulpd	%xmm15, %xmm1	addpd	%xmm2, %xmm9	mulpd	%xmm15, %xmm3	addpd	%xmm4, %xmm10	mulpd	%xmm15, %xmm5	addpd	%xmm6, %xmm11	mulpd	%xmm15, %xmm7	addpd	%xmm1, %xmm8	addpd	%xmm3, %xmm9	addpd	%xmm5, %xmm10	addpd	%xmm7, %xmm11	MOVDDUP( 0 * SIZE, X, %xmm0)	MOVDDUP( 1 * SIZE, X, %xmm1)	addq	 INCX, X	MOVDDUP( 0 * SIZE, X, %xmm2)	MOVDDUP( 1 * SIZE, X, %xmm3)	addq	 INCX, X	MOVDDUP( 0 * SIZE, X, %xmm4)	MOVDDUP( 1 * SIZE, X, %xmm5)	addq	 INCX, X	MOVDDUP( 0 * SIZE, X, %xmm6)	MOVDDUP( 1 * SIZE, X, %xmm7)	addq	 INCX, X	mulpd	%xmm14, %xmm0	mulpd	%xmm14, %xmm2	mulpd	%xmm14, %xmm4	mulpd	%xmm14, %xmm6	movsd	%xmm8,   0 * SIZE(YY)	movhpd	%xmm8,   1 * SIZE(YY)	addq	 INCY, YY	movsd	%xmm9,   0 * SIZE(YY)	movhpd	%xmm9,   1 * SIZE(YY)	addq	 INCY, YY	movsd	%xmm10,  0 * SIZE(YY)	movhpd	%xmm10,  1 * SIZE(YY)	addq	 INCY, YY	movsd	%xmm11,  0 * SIZE(YY)	movhpd	%xmm11,  1 * SIZE(YY)	addq	 INCY, YY	movsd	 0 * SIZE(Y), %xmm8	movhpd	 1 * SIZE(Y), %xmm8	addq	 INCY, Y	movsd	 0 * SIZE(Y), %xmm9	movhpd	 1 * SIZE(Y), %xmm9	addq	 INCY, Y	movsd	 0 * SIZE(Y), %xmm10	movhpd	 1 * SIZE(Y), %xmm10	addq	 INCY, Y	movsd	 0 * SIZE(Y), %xmm11	movhpd	 1 * SIZE(Y), %xmm11	addq	 INCY, Y	decq	%rax	jg	.L51	ALIGN_3.L52:	addpd	%xmm0, %xmm8	mulpd	%xmm15, %xmm1	addpd	%xmm2, %xmm9	mulpd	%xmm15, %xmm3	addpd	%xmm4, %xmm10	mulpd	%xmm15, %xmm5	addpd	%xmm6, %xmm11	mulpd	%xmm15, %xmm7	addpd	%xmm1, %xmm8	addpd	%xmm3, %xmm9	addpd	%xmm5, %xmm10	addpd	%xmm7, %xmm11	MOVDDUP( 0 * SIZE, X, %xmm0)	MOVDDUP( 1 * SIZE, X, %xmm1)	addq	 INCX, X	MOVDDUP( 0 * SIZE, X, %xmm2)	MOVDDUP( 1 * SIZE, X, %xmm3)	addq	 INCX, X	MOVDDUP( 0 * SIZE, X, %xmm4)	MOVDDUP( 1 * SIZE, X, %xmm5)	addq	 INCX, X	MOVDDUP( 0 * SIZE, X, %xmm6)	MOVDDUP( 1 * SIZE, X, %xmm7)	addq	 INCX, X	mulpd	%xmm14, %xmm0	mulpd	%xmm14, %xmm2	mulpd	%xmm14, %xmm4	mulpd	%xmm14, %xmm6	movsd	%xmm8,   0 * SIZE(YY)	movhpd	%xmm8,   1 * SIZE(YY)	addq	 INCY, YY	movsd	%xmm9,   0 * SIZE(YY)	movhpd	%xmm9,   1 * SIZE(YY)	addq	 INCY, YY	movsd	%xmm10,  0 * SIZE(YY)	movhpd	%xmm10,  1 * SIZE(YY)	addq	 INCY, YY	movsd	%xmm11,  0 * SIZE(YY)	movhpd	%xmm11,  1 * SIZE(YY)	addq	 INCY, YY	movsd	 0 * SIZE(Y), %xmm8	movhpd	 1 * SIZE(Y), %xmm8	addq	 INCY, Y	movsd	 0 * SIZE(Y), %xmm9	movhpd	 1 * SIZE(Y), %xmm9	addq	 INCY, Y	movsd	 0 * SIZE(Y), %xmm10	movhpd	 1 * SIZE(Y), %xmm10	addq	 INCY, Y	movsd	 0 * SIZE(Y), %xmm11	movhpd	 1 * SIZE(Y), %xmm11	addq	 INCY, Y	addpd	%xmm0, %xmm8	mulpd	%xmm15, %xmm1	addpd	%xmm2, %xmm9	mulpd	%xmm15, %xmm3	addpd	%xmm4, %xmm10	mulpd	%xmm15, %xmm5	addpd	%xmm6, %xmm11	mulpd	%xmm15, %xmm7	addpd	%xmm1, %xmm8	addpd	%xmm3, %xmm9	addpd	%xmm5, %xmm10	addpd	%xmm7, %xmm11	movsd	%xmm8,   0 * SIZE(YY)	movhpd	%xmm8,   1 * SIZE(YY)	addq	 INCY, YY	movsd	%xmm9,   0 * SIZE(YY)	movhpd	%xmm9,   1 * SIZE(YY)	addq	 INCY, YY	movsd	%xmm10,  0 * SIZE(YY)	movhpd	%xmm10,  1 * SIZE(YY)	addq	 INCY, YY	movsd	%xmm11,  0 * SIZE(YY)	movhpd	%xmm11,  1 * SIZE(YY)	addq	 INCY, YY	ALIGN_3.L55:	movq	M,  %rax	andq	$4, %rax	jle	.L56	MOVDDUP( 0 * SIZE, X, %xmm0)	MOVDDUP( 1 * SIZE, X, %xmm1)	addq	INCX, X	MOVDDUP( 0 * SIZE, X, %xmm2)	MOVDDUP( 1 * SIZE, X, %xmm3)	addq	INCX, X	MOVDDUP( 0 * SIZE, X, %xmm4)	MOVDDUP( 1 * SIZE, X, %xmm5)	addq	INCX, X	MOVDDUP( 0 * SIZE, X, %xmm6)	MOVDDUP( 1 * SIZE, X, %xmm7)	addq	INCX, X	movsd	 0 * SIZE(Y), %xmm8	movhpd	 1 * SIZE(Y), %xmm8	addq	 INCY, Y	movsd	 0 * SIZE(Y), %xmm9	movhpd	 1 * SIZE(Y), %xmm9	addq	 INCY, Y	movsd	 0 * SIZE(Y), %xmm10	movhpd	 1 * SIZE(Y), %xmm10	addq	 INCY, Y	movsd	 0 * SIZE(Y), %xmm11	movhpd	 1 * SIZE(Y), %xmm11	addq	 INCY, Y	mulpd	%xmm14, %xmm0	mulpd	%xmm14, %xmm2	mulpd	%xmm14, %xmm4	mulpd	%xmm14, %xmm6	addpd	%xmm0, %xmm8	mulpd	%xmm15, %xmm1	addpd	%xmm2, %xmm9	mulpd	%xmm15, %xmm3	addpd	%xmm4, %xmm10	mulpd	%xmm15, %xmm5	addpd	%xmm6, %xmm11	mulpd	%xmm15, %xmm7	addpd	%xmm1, %xmm8	addpd	%xmm3, %xmm9	addpd	%xmm5, %xmm10	addpd	%xmm7, %xmm11	movsd	%xmm8,  0 * SIZE(YY)	movhpd	%xmm8,  1 * SIZE(YY)	addq	 INCY, YY	movsd	%xmm9,  0 * SIZE(YY)	movhpd	%xmm9,  1 * SIZE(YY)	addq	 INCY, YY	movsd	%xmm10, 0 * SIZE(YY)	movhpd	%xmm10, 1 * SIZE(YY)	addq	 INCY, YY	movsd	%xmm11, 0 * SIZE(YY)	movhpd	%xmm11, 1 * SIZE(YY)	addq	 INCY, YY	ALIGN_3.L56:	movq	M,  %rax	andq	$2, %rax	jle	.L57	MOVDDUP( 0 * SIZE, X, %xmm0)	MOVDDUP( 1 * SIZE, X, %xmm1)	addq	INCX, X	MOVDDUP( 0 * SIZE, X, %xmm2)	MOVDDUP( 1 * SIZE, X, %xmm3)	addq	INCX, X	movsd	 0 * SIZE(Y), %xmm8	movhpd	 1 * SIZE(Y), %xmm8	addq	 INCY, Y	movsd	 0 * SIZE(Y), %xmm9	movhpd	 1 * SIZE(Y), %xmm9	addq	 INCY, Y	mulpd	%xmm14, %xmm0	mulpd	%xmm14, %xmm2	mulpd	%xmm15, %xmm1	mulpd	%xmm15, %xmm3	addpd	%xmm0, %xmm8	addpd	%xmm2, %xmm9	addpd	%xmm1, %xmm8	addpd	%xmm3, %xmm9	movsd	%xmm8,   0 * SIZE(YY)	movhpd	%xmm8,   1 * SIZE(YY)	addq	 INCY, YY	movsd	%xmm9,   0 * SIZE(YY)	movhpd	%xmm9,   1 * SIZE(YY)	addq	 INCY, YY	ALIGN_3.L57:	movq	M,  %rax	andq	$1, %rax	jle	.L999	MOVDDUP( 0 * SIZE, X, %xmm0)	MOVDDUP( 1 * SIZE, X, %xmm1)	movsd	 0 * SIZE(Y), %xmm8	movhpd	 1 * SIZE(Y), %xmm8	mulpd	%xmm14, %xmm0	mulpd	%xmm15, %xmm1	addpd	%xmm0, %xmm8	addpd	%xmm1, %xmm8	movsd	%xmm8,   0 * SIZE(YY)	movhpd	%xmm8,   1 * SIZE(YY)	ALIGN_3.L999:	xorq	%rax, %rax	RESTOREREGISTERS	ret	EPILOGUE

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -