⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 zaxpy_sse2_core2.s

📁 Optimized GotoBLAS libraries
💻 S
📖 第 1 页 / 共 3 页
字号:
	mulpd	%xmm15, %xmm5	mulpd	%xmm15, %xmm7	addpd	%xmm1, %xmm8	addpd	%xmm3, %xmm9	addpd	%xmm5, %xmm10	addpd	%xmm7, %xmm11	movapd	%xmm8,   0 * SIZE(Y)	movapd	%xmm9,   2 * SIZE(Y)	movapd	%xmm10,  4 * SIZE(Y)	movapd	%xmm11,  6 * SIZE(Y)	movsd	 8 * SIZE(X), %xmm0	movhpd	 9 * SIZE(X), %xmm0	movsd	10 * SIZE(X), %xmm2	movhpd	11 * SIZE(X), %xmm2	movsd	12 * SIZE(X), %xmm4	movhpd	13 * SIZE(X), %xmm4	movsd	14 * SIZE(X), %xmm6	movhpd	15 * SIZE(X), %xmm6	pshufd	 $0x4e, %xmm0, %xmm1	pshufd	 $0x4e, %xmm2, %xmm3	pshufd	 $0x4e, %xmm4, %xmm5	pshufd	 $0x4e, %xmm6, %xmm7	movapd	 8 * SIZE(Y), %xmm8	movapd	10 * SIZE(Y), %xmm9	movapd	12 * SIZE(Y), %xmm10	movapd	14 * SIZE(Y), %xmm11	mulpd	%xmm14, %xmm0	mulpd	%xmm14, %xmm2	mulpd	%xmm14, %xmm4	mulpd	%xmm14, %xmm6	addpd	%xmm0, %xmm8	addpd	%xmm2, %xmm9	addpd	%xmm4, %xmm10	addpd	%xmm6, %xmm11	mulpd	%xmm15, %xmm1	mulpd	%xmm15, %xmm3	mulpd	%xmm15, %xmm5	mulpd	%xmm15, %xmm7	addpd	%xmm1, %xmm8	addpd	%xmm3, %xmm9	addpd	%xmm5, %xmm10	addpd	%xmm7, %xmm11	movapd	%xmm8,   8 * SIZE(Y)	movapd	%xmm9,  10 * SIZE(Y)	movapd	%xmm10, 12 * SIZE(Y)	movapd	%xmm11, 14 * SIZE(Y)	subq	$-16 * SIZE, X	subq	$-16 * SIZE, Y	ALIGN_3.L25:	movq	M,  %rax	andq	$4, %rax	jle	.L26	movsd	 0 * SIZE(X), %xmm0	movhpd	 1 * SIZE(X), %xmm0	movsd	 2 * SIZE(X), %xmm2	movhpd	 3 * SIZE(X), %xmm2	movsd	 4 * SIZE(X), %xmm4	movhpd	 5 * SIZE(X), %xmm4	movsd	 6 * SIZE(X), %xmm6	movhpd	 7 * SIZE(X), %xmm6	pshufd	 $0x4e, %xmm0, %xmm1	pshufd	 $0x4e, %xmm2, %xmm3	pshufd	 $0x4e, %xmm4, %xmm5	pshufd	 $0x4e, %xmm6, %xmm7	movapd	 0 * SIZE(Y), %xmm8	movapd	 2 * SIZE(Y), %xmm9	movapd	 4 * SIZE(Y), %xmm10	movapd	 6 * SIZE(Y), %xmm11	mulpd	%xmm14, %xmm0	mulpd	%xmm14, %xmm2	mulpd	%xmm14, %xmm4	mulpd	%xmm14, %xmm6	addpd	%xmm0, %xmm8	addpd	%xmm2, %xmm9	addpd	%xmm4, %xmm10	addpd	%xmm6, %xmm11	mulpd	%xmm15, %xmm1	mulpd	%xmm15, %xmm3	mulpd	%xmm15, %xmm5	mulpd	%xmm15, %xmm7	addpd	%xmm1, %xmm8	addpd	%xmm3, %xmm9	addpd	%xmm5, %xmm10	addpd	%xmm7, %xmm11	movapd	%xmm8,   0 * SIZE(Y)	movapd	%xmm9,   2 * SIZE(Y)	movapd	%xmm10,  4 * SIZE(Y)	movapd	%xmm11,  6 * SIZE(Y)	addq	$8 * SIZE, X	addq	$8 * SIZE, Y	ALIGN_3.L26:	movq	M,  %rax	andq	$2, %rax	jle	.L27	movsd	 0 * SIZE(X), %xmm0	movhpd	 1 * SIZE(X), %xmm0	movsd	 2 * SIZE(X), %xmm2	movhpd	 3 * SIZE(X), %xmm2	pshufd	 $0x4e, %xmm0, %xmm1	pshufd	 $0x4e, %xmm2, %xmm3	movapd	 0 * SIZE(Y), %xmm8	movapd	 2 * SIZE(Y), %xmm9	mulpd	%xmm14, %xmm0	mulpd	%xmm14, %xmm2	mulpd	%xmm15, %xmm1	mulpd	%xmm15, %xmm3	addpd	%xmm0, %xmm8	addpd	%xmm2, %xmm9	addpd	%xmm1, %xmm8	addpd	%xmm3, %xmm9	movapd	%xmm8,   0 * SIZE(Y)	movapd	%xmm9,   2 * SIZE(Y)	addq	$4 * SIZE, X	addq	$4 * SIZE, Y	ALIGN_3.L27:	movq	M,  %rax	andq	$1, %rax	jle	.L999	movsd	 0 * SIZE(X), %xmm0	movhpd	 1 * SIZE(X), %xmm0	pshufd	 $0x4e, %xmm0, %xmm1		movapd	 0 * SIZE(Y), %xmm8	mulpd	%xmm14, %xmm0	mulpd	%xmm15, %xmm1	addpd	%xmm0, %xmm8	addpd	%xmm1, %xmm8	movapd	%xmm8,   0 * SIZE(Y)	jmp	.L999	ALIGN_3.L30:	testq	$SIZE, X	jne	.L40#ifndef CONJ	movddup	 %xmm0,  %xmm14	pxor	 %xmm15, %xmm15	subsd	 %xmm1,  %xmm15	unpcklpd %xmm1,  %xmm15#else	movddup	 %xmm1,  %xmm15	movapd	 %xmm0,  %xmm14	pxor	 %xmm13, %xmm13	subsd	 %xmm0,  %xmm13	unpcklpd %xmm13, %xmm14#endif	movq	Y, YY	movq	M,  %rax	sarq	$3, %rax	jle	.L35	movapd	 (X), %xmm0	addq	INCX, X	movapd	 (X), %xmm2	addq	INCX, X	movapd	 (X), %xmm4	addq	INCX, X	movapd	 (X), %xmm6	addq	INCX, X	pshufd	 $0x4e, %xmm0, %xmm1	pshufd	 $0x4e, %xmm2, %xmm3	pshufd	 $0x4e, %xmm4, %xmm5	pshufd	 $0x4e, %xmm6, %xmm7	movapd	 (Y), %xmm8	addq	 INCY, Y	movapd	 (Y), %xmm9	addq	 INCY, Y	movapd	 (Y), %xmm10	addq	 INCY, Y	movapd	 (Y), %xmm11	addq	 INCY, Y	mulpd	%xmm14, %xmm0	mulpd	%xmm14, %xmm2	mulpd	%xmm14, %xmm4	mulpd	%xmm14, %xmm6	decq	%rax	jle	.L32	ALIGN_3.L31:	addpd	%xmm0, %xmm8	mulpd	%xmm15, %xmm1	addpd	%xmm2, %xmm9	mulpd	%xmm15, %xmm3	addpd	%xmm4, %xmm10	mulpd	%xmm15, %xmm5	addpd	%xmm6, %xmm11	mulpd	%xmm15, %xmm7	addpd	%xmm1, %xmm8	addpd	%xmm3, %xmm9	addpd	%xmm5, %xmm10	addpd	%xmm7, %xmm11	movapd	 (X), %xmm0	addq	INCX, X	movapd	 (X), %xmm2	addq	INCX, X	movapd	 (X), %xmm4	addq	INCX, X	movapd	 (X), %xmm6	addq	INCX, X	pshufd	 $0x4e, %xmm0, %xmm1	pshufd	 $0x4e, %xmm2, %xmm3	pshufd	 $0x4e, %xmm4, %xmm5	pshufd	 $0x4e, %xmm6, %xmm7	mulpd	%xmm14, %xmm0	mulpd	%xmm14, %xmm2	mulpd	%xmm14, %xmm4	mulpd	%xmm14, %xmm6	movapd	%xmm8,   (YY)	addq	 INCY, YY	movapd	%xmm9,   (YY)	addq	 INCY, YY	movapd	%xmm10,  (YY)	addq	 INCY, YY	movapd	%xmm11,  (YY)	addq	 INCY, YY	movapd	 (Y), %xmm8	addq	 INCY, Y	movapd	 (Y), %xmm9	addq	 INCY, Y	movapd	 (Y), %xmm10	addq	 INCY, Y	movapd	 (Y), %xmm11	addq	 INCY, Y	addpd	%xmm0, %xmm8	mulpd	%xmm15, %xmm1	addpd	%xmm2, %xmm9	mulpd	%xmm15, %xmm3	addpd	%xmm4, %xmm10	mulpd	%xmm15, %xmm5	addpd	%xmm6, %xmm11	mulpd	%xmm15, %xmm7	addpd	%xmm1, %xmm8	addpd	%xmm3, %xmm9	addpd	%xmm5, %xmm10	addpd	%xmm7, %xmm11	movapd	 (X), %xmm0	addq	INCX, X	movapd	 (X), %xmm2	addq	INCX, X	movapd	 (X), %xmm4	addq	INCX, X	movapd	 (X), %xmm6	addq	INCX, X	pshufd	 $0x4e, %xmm0, %xmm1	pshufd	 $0x4e, %xmm2, %xmm3	pshufd	 $0x4e, %xmm4, %xmm5	pshufd	 $0x4e, %xmm6, %xmm7	mulpd	%xmm14, %xmm0	mulpd	%xmm14, %xmm2	mulpd	%xmm14, %xmm4	mulpd	%xmm14, %xmm6	movapd	%xmm8,   (YY)	addq	 INCY, YY	movapd	%xmm9,   (YY)	addq	 INCY, YY	movapd	%xmm10,  (YY)	addq	 INCY, YY	movapd	%xmm11,  (YY)	addq	 INCY, YY	movapd	 (Y), %xmm8	addq	 INCY, Y	movapd	 (Y), %xmm9	addq	 INCY, Y	movapd	 (Y), %xmm10	addq	 INCY, Y	movapd	 (Y), %xmm11	addq	 INCY, Y	decq	%rax	jg	.L31	ALIGN_3.L32:	addpd	%xmm0, %xmm8	mulpd	%xmm15, %xmm1	addpd	%xmm2, %xmm9	mulpd	%xmm15, %xmm3	addpd	%xmm4, %xmm10	mulpd	%xmm15, %xmm5	addpd	%xmm6, %xmm11	mulpd	%xmm15, %xmm7	addpd	%xmm1, %xmm8	addpd	%xmm3, %xmm9	addpd	%xmm5, %xmm10	addpd	%xmm7, %xmm11	movapd	 (X), %xmm0	addq	INCX, X	movapd	 (X), %xmm2	addq	INCX, X	movapd	 (X), %xmm4	addq	INCX, X	movapd	 (X), %xmm6	addq	INCX, X	pshufd	 $0x4e, %xmm0, %xmm1	pshufd	 $0x4e, %xmm2, %xmm3	pshufd	 $0x4e, %xmm4, %xmm5	pshufd	 $0x4e, %xmm6, %xmm7	mulpd	%xmm14, %xmm0	mulpd	%xmm14, %xmm2	mulpd	%xmm14, %xmm4	mulpd	%xmm14, %xmm6	movapd	%xmm8,   (YY)	addq	 INCY, YY	movapd	%xmm9,   (YY)	addq	 INCY, YY	movapd	%xmm10,  (YY)	addq	 INCY, YY	movapd	%xmm11,  (YY)	addq	 INCY, YY	movapd	 (Y), %xmm8	addq	 INCY, Y	movapd	 (Y), %xmm9	addq	 INCY, Y	movapd	 (Y), %xmm10	addq	 INCY, Y	movapd	 (Y), %xmm11	addq	 INCY, Y	addpd	%xmm0, %xmm8	mulpd	%xmm15, %xmm1	addpd	%xmm2, %xmm9	mulpd	%xmm15, %xmm3	addpd	%xmm4, %xmm10	mulpd	%xmm15, %xmm5	addpd	%xmm6, %xmm11	mulpd	%xmm15, %xmm7	addpd	%xmm1, %xmm8	addpd	%xmm3, %xmm9	addpd	%xmm5, %xmm10	addpd	%xmm7, %xmm11	movapd	%xmm8,   (YY)	addq	 INCY, YY	movapd	%xmm9,   (YY)	addq	 INCY, YY	movapd	%xmm10,  (YY)	addq	 INCY, YY	movapd	%xmm11,  (YY)	addq	 INCY, YY	ALIGN_3.L35:	movq	M,  %rax	andq	$4, %rax	jle	.L36	movapd	 (X), %xmm0	addq	INCX, X	movapd	 (X), %xmm2	addq	INCX, X	movapd	 (X), %xmm4	addq	INCX, X	movapd	 (X), %xmm6	addq	INCX, X	pshufd	 $0x4e, %xmm0, %xmm1	pshufd	 $0x4e, %xmm2, %xmm3	pshufd	 $0x4e, %xmm4, %xmm5	pshufd	 $0x4e, %xmm6, %xmm7	movapd	 (Y), %xmm8	addq	 INCY, Y	movapd	 (Y), %xmm9	addq	 INCY, Y	movapd	 (Y), %xmm10	addq	 INCY, Y	movapd	 (Y), %xmm11	addq	 INCY, Y	mulpd	%xmm14, %xmm0	mulpd	%xmm14, %xmm2	mulpd	%xmm14, %xmm4	mulpd	%xmm14, %xmm6	addpd	%xmm0, %xmm8	mulpd	%xmm15, %xmm1	addpd	%xmm2, %xmm9	mulpd	%xmm15, %xmm3	addpd	%xmm4, %xmm10	mulpd	%xmm15, %xmm5	addpd	%xmm6, %xmm11	mulpd	%xmm15, %xmm7	addpd	%xmm1, %xmm8	addpd	%xmm3, %xmm9	addpd	%xmm5, %xmm10	addpd	%xmm7, %xmm11	movapd	%xmm8,  (YY)	addq	 INCY, YY	movapd	%xmm9,  (YY)	addq	 INCY, YY	movapd	%xmm10, (YY)	addq	 INCY, YY	movapd	%xmm11, (YY)	addq	 INCY, YY	ALIGN_3.L36:	movq	M,  %rax	andq	$2, %rax	jle	.L37	movapd	 (X), %xmm0	addq	INCX, X	movapd	 (X), %xmm2	addq	INCX, X	pshufd	 $0x4e, %xmm0, %xmm1	pshufd	 $0x4e, %xmm2, %xmm3	movapd	 (Y), %xmm8	addq	 INCY, Y	movapd	 (Y), %xmm9	addq	 INCY, Y	mulpd	%xmm14, %xmm0	mulpd	%xmm14, %xmm2	mulpd	%xmm15, %xmm1	mulpd	%xmm15, %xmm3	addpd	%xmm0, %xmm8	addpd	%xmm2, %xmm9	addpd	%xmm1, %xmm8	addpd	%xmm3, %xmm9	movapd	%xmm8,   (YY)	addq	 INCY, YY	movapd	%xmm9,   (YY)	addq	 INCY, YY	ALIGN_3.L37:	movq	M,  %rax	andq	$1, %rax	jle	.L999	movapd	 (X), %xmm0	pshufd	 $0x4e, %xmm0, %xmm1	movapd	 (Y), %xmm8	mulpd	%xmm14, %xmm0	mulpd	%xmm15, %xmm1	addpd	%xmm0, %xmm8	addpd	%xmm1, %xmm8	movapd	%xmm8,   (YY)	jmp	.L999	ALIGN_3.L40:	movq	Y, YY	movq	M,  %rax	sarq	$3, %rax	jle	.L45	MOVDDUP( 0 * SIZE, X, %xmm0)	MOVDDUP( 1 * SIZE, X, %xmm1)	addq	 INCX, X	MOVDDUP( 0 * SIZE, X, %xmm2)	MOVDDUP( 1 * SIZE, X, %xmm3)	addq	 INCX, X	MOVDDUP( 0 * SIZE, X, %xmm4)	MOVDDUP( 1 * SIZE, X, %xmm5)	addq	 INCX, X	MOVDDUP( 0 * SIZE, X, %xmm6)	MOVDDUP( 1 * SIZE, X, %xmm7)	addq	 INCX, X	movapd	 (Y), %xmm8	addq	 INCY, Y	movapd	 (Y), %xmm9	addq	 INCY, Y	movapd	 (Y), %xmm10	addq	 INCY, Y	movapd	 (Y), %xmm11	addq	 INCY, Y	mulpd	%xmm14, %xmm0	mulpd	%xmm14, %xmm2	mulpd	%xmm14, %xmm4	mulpd	%xmm14, %xmm6	decq	%rax	jle	.L42	ALIGN_3.L41:	addpd	%xmm0, %xmm8	mulpd	%xmm15, %xmm1	addpd	%xmm2, %xmm9	mulpd	%xmm15, %xmm3	addpd	%xmm4, %xmm10	mulpd	%xmm15, %xmm5	addpd	%xmm6, %xmm11	mulpd	%xmm15, %xmm7	addpd	%xmm1, %xmm8	addpd	%xmm3, %xmm9	addpd	%xmm5, %xmm10	addpd	%xmm7, %xmm11	MOVDDUP( 0 * SIZE, X, %xmm0)	MOVDDUP( 1 * SIZE, X, %xmm1)	addq	 INCX, X	MOVDDUP( 0 * SIZE, X, %xmm2)	MOVDDUP( 1 * SIZE, X, %xmm3)	addq	 INCX, X	MOVDDUP( 0 * SIZE, X, %xmm4)	MOVDDUP( 1 * SIZE, X, %xmm5)	addq	 INCX, X	MOVDDUP( 0 * SIZE, X, %xmm6)	MOVDDUP( 1 * SIZE, X, %xmm7)	addq	 INCX, X	mulpd	%xmm14, %xmm0	mulpd	%xmm14, %xmm2	mulpd	%xmm14, %xmm4	mulpd	%xmm14, %xmm6	movapd	%xmm8,   (YY)	addq	 INCY, YY	movapd	%xmm9,   (YY)	addq	 INCY, YY	movapd	%xmm10,  (YY)	addq	 INCY, YY	movapd	%xmm11,  (YY)	addq	 INCY, YY	movapd	 (Y), %xmm8	addq	 INCY, Y	movapd	 (Y), %xmm9	addq	 INCY, Y	movapd	 (Y), %xmm10	addq	 INCY, Y	movapd	 (Y), %xmm11	addq	 INCY, Y	addpd	%xmm0, %xmm8	mulpd	%xmm15, %xmm1	addpd	%xmm2, %xmm9	mulpd	%xmm15, %xmm3	addpd	%xmm4, %xmm10	mulpd	%xmm15, %xmm5	addpd	%xmm6, %xmm11	mulpd	%xmm15, %xmm7

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -