⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 zdot_sse2_opteron.s

📁 Optimized GotoBLAS libraries
💻 S
📖 第 1 页 / 共 2 页
字号:
	mulpd	 1 * SIZE(Y, %rax, 8), %xmm5	mulpd	 3 * SIZE(Y, %rax, 8), %xmm6	mulpd	 3 * SIZE(Y, %rax, 8), %xmm7	subq	$-16, %rax	jge	.L32	ALIGN_3.L31:	PREFETCH	(PREFETCH_SIZE_X + 0) * SIZE(X, %rax, 8)	mulpd	-11 * SIZE(Y, %rax, 8), %xmm8	addpd	 %xmm4, %xmm0	movapd	 -7 * SIZE(X, %rax, 8), %xmm4	mulpd	-11 * SIZE(Y, %rax, 8), %xmm9	addpd	 %xmm5, %xmm1	movlpd	 -8 * SIZE(X, %rax, 8), %xmm5	movhpd	 -5 * SIZE(X, %rax, 8), %xmm5	mulpd	 -9 * SIZE(Y, %rax, 8), %xmm10	addpd	 %xmm6, %xmm2	movapd	 -5 * SIZE(X, %rax, 8), %xmm6	mulpd	 -9 * SIZE(Y, %rax, 8), %xmm11	addpd	 %xmm7, %xmm3	movlpd	 -6 * SIZE(X, %rax, 8), %xmm7	movhpd	 -3 * SIZE(X, %rax, 8), %xmm7	PREFETCH	(PREFETCH_SIZE_Y + 0) * SIZE(Y, %rax, 8)	mulpd	 -7 * SIZE(Y, %rax, 8), %xmm4	addpd	 %xmm8, %xmm0	movapd	 -3 * SIZE(X, %rax, 8), %xmm8	mulpd	 -7 * SIZE(Y, %rax, 8), %xmm5	addpd	 %xmm9, %xmm1	movlpd	 -4 * SIZE(X, %rax, 8), %xmm9	movhpd	 -1 * SIZE(X, %rax, 8), %xmm9	mulpd	 -5 * SIZE(Y, %rax, 8), %xmm6	addpd	 %xmm10, %xmm2	movapd	 -1 * SIZE(X, %rax, 8), %xmm10	mulpd	 -5 * SIZE(Y, %rax, 8), %xmm7	addpd	 %xmm11, %xmm3	movlpd	 -2 * SIZE(X, %rax, 8), %xmm11	movhpd	  1 * SIZE(X, %rax, 8), %xmm11	PREFETCH	(PREFETCH_SIZE_X + 8) * SIZE(X, %rax, 8)	mulpd	 -3 * SIZE(Y, %rax, 8), %xmm8	addpd	 %xmm4, %xmm0	movapd	  1 * SIZE(X, %rax, 8), %xmm4	mulpd	 -3 * SIZE(Y, %rax, 8), %xmm9	addpd	 %xmm5, %xmm1	movlpd	  0 * SIZE(X, %rax, 8), %xmm5	movhpd	  3 * SIZE(X, %rax, 8), %xmm5	mulpd	 -1 * SIZE(Y, %rax, 8), %xmm10	addpd	 %xmm6, %xmm2	movapd	  3 * SIZE(X, %rax, 8), %xmm6	mulpd	 -1 * SIZE(Y, %rax, 8), %xmm11	addpd	 %xmm7, %xmm3	movlpd	  2 * SIZE(X, %rax, 8), %xmm7	movhpd	  5 * SIZE(X, %rax, 8), %xmm7	PREFETCH	(PREFETCH_SIZE_Y + 8) * SIZE(Y, %rax, 8)	mulpd	  1 * SIZE(Y, %rax, 8), %xmm4	addpd	 %xmm8, %xmm0	movapd	  5 * SIZE(X, %rax, 8), %xmm8	mulpd	  1 * SIZE(Y, %rax, 8), %xmm5	addpd	 %xmm9, %xmm1	movlpd	  4 * SIZE(X, %rax, 8), %xmm9	movhpd	  7 * SIZE(X, %rax, 8), %xmm9	mulpd	  3 * SIZE(Y, %rax, 8), %xmm6	addpd	 %xmm10, %xmm2	movapd	 7 * SIZE(X, %rax, 8), %xmm10	mulpd	  3 * SIZE(Y, %rax, 8), %xmm7	addpd	 %xmm11, %xmm3	movlpd	  6 * SIZE(X, %rax, 8), %xmm11	movhpd	  9 * SIZE(X, %rax, 8), %xmm11	subq	$-16, %rax	jl,pt	.L31	ALIGN_3.L32:	mulpd	-11 * SIZE(Y, %rax, 8), %xmm8	addpd	 %xmm4, %xmm0	movapd	 -7 * SIZE(X, %rax, 8), %xmm4	mulpd	-11 * SIZE(Y, %rax, 8), %xmm9	addpd	 %xmm5, %xmm1	movlpd	 -8 * SIZE(X, %rax, 8), %xmm5	movhpd	 -5 * SIZE(X, %rax, 8), %xmm5	mulpd	 -9 * SIZE(Y, %rax, 8), %xmm10	addpd	 %xmm6, %xmm2	movapd	 -5 * SIZE(X, %rax, 8), %xmm6	mulpd	 -9 * SIZE(Y, %rax, 8), %xmm11	addpd	 %xmm7, %xmm3	movlpd	 -6 * SIZE(X, %rax, 8), %xmm7	movhpd	 -3 * SIZE(X, %rax, 8), %xmm7	mulpd	 -7 * SIZE(Y, %rax, 8), %xmm4	addpd	 %xmm8, %xmm0	movapd	 -3 * SIZE(X, %rax, 8), %xmm8	mulpd	 -7 * SIZE(Y, %rax, 8), %xmm5	addpd	 %xmm9, %xmm1	movlpd	 -4 * SIZE(X, %rax, 8), %xmm9	movhpd	 -1 * SIZE(X, %rax, 8), %xmm9	mulpd	 -5 * SIZE(Y, %rax, 8), %xmm6	addpd	 %xmm10, %xmm2	movapd	 -1 * SIZE(X, %rax, 8), %xmm10	mulpd	 -5 * SIZE(Y, %rax, 8), %xmm7	addpd	 %xmm11, %xmm3	movlpd	 -2 * SIZE(X, %rax, 8), %xmm11	movhpd	  1 * SIZE(X, %rax, 8), %xmm11	mulpd	 -3 * SIZE(Y, %rax, 8), %xmm8	addpd	 %xmm4, %xmm0	mulpd	 -3 * SIZE(Y, %rax, 8), %xmm9	addpd	 %xmm5, %xmm1	mulpd	 -1 * SIZE(Y, %rax, 8), %xmm10	addpd	 %xmm6, %xmm2	mulpd	 -1 * SIZE(Y, %rax, 8), %xmm11	addpd	 %xmm7, %xmm3	addpd	 %xmm8, %xmm0	addpd	 %xmm9, %xmm1	addpd	 %xmm10, %xmm2	addpd	 %xmm11, %xmm3	ALIGN_3.L35:	testq	$7, N	je	.L39	movq	N,  %rax	andq	$4, %rax	jle	.L36	movapd	 1 * SIZE(X), %xmm4	movapd	 3 * SIZE(X), %xmm6	movlpd	 0 * SIZE(X), %xmm5	movhpd	 3 * SIZE(X), %xmm5	movlpd	 2 * SIZE(X), %xmm7	movhpd	 5 * SIZE(X), %xmm7	mulpd	 1 * SIZE(Y), %xmm4	mulpd	 1 * SIZE(Y), %xmm5	mulpd	 3 * SIZE(Y), %xmm6	mulpd	 3 * SIZE(Y), %xmm7	addpd	 %xmm4, %xmm0	addpd	 %xmm5, %xmm1	addpd	 %xmm6, %xmm2	addpd	 %xmm7, %xmm3	movapd	 5 * SIZE(X), %xmm8	movapd	 7 * SIZE(X), %xmm10	movlpd	 4 * SIZE(X), %xmm9	movhpd	 7 * SIZE(X), %xmm9	movlpd	 6 * SIZE(X), %xmm11	movhpd	 9 * SIZE(X), %xmm11	mulpd	 5 * SIZE(Y), %xmm8	mulpd	 5 * SIZE(Y), %xmm9	mulpd	 7 * SIZE(Y), %xmm10	mulpd	 7 * SIZE(Y), %xmm11	addpd	 %xmm8, %xmm0	addpd	 %xmm9, %xmm1	addpd	 %xmm10, %xmm2	addpd	 %xmm11, %xmm3	addq	$8 * SIZE, X	addq	$8 * SIZE, Y	ALIGN_3.L36:	movq	N,  %rax	andq	$2, %rax	jle	.L37	movapd	 1 * SIZE(X), %xmm4	movapd	 3 * SIZE(X), %xmm6	movlpd	 0 * SIZE(X), %xmm5	movhpd	 3 * SIZE(X), %xmm5	movlpd	 2 * SIZE(X), %xmm7	movhpd	 5 * SIZE(X), %xmm7	mulpd	 1 * SIZE(Y), %xmm4	mulpd	 1 * SIZE(Y), %xmm5	mulpd	 3 * SIZE(Y), %xmm6	mulpd	 3 * SIZE(Y), %xmm7	addpd	 %xmm4, %xmm0	addpd	 %xmm5, %xmm1	addpd	 %xmm6, %xmm2	addpd	 %xmm7, %xmm3	addq	 $4 * SIZE, X	addq	 $4 * SIZE, Y	ALIGN_3.L37:	movq	N,  %rax	andq	$1, %rax	jle	.L39	movapd	 1 * SIZE(X), %xmm4	movlpd	 0 * SIZE(X), %xmm5	movhpd	 3 * SIZE(X), %xmm5	mulpd	 1 * SIZE(Y), %xmm4	mulpd	 1 * SIZE(Y), %xmm5	addpd	 %xmm4, %xmm0	addpd	 %xmm5, %xmm1	addq	 $2 * SIZE, X	addq	 $2 * SIZE, Y	ALIGN_3	.L39:	movsd	1 * SIZE(X), %xmm4	movsd	0 * SIZE(X), %xmm5	movsd	1 * SIZE(Y), %xmm6	mulsd	%xmm6, %xmm4	mulsd	%xmm6, %xmm5	addsd	%xmm4, %xmm0	addsd	%xmm5, %xmm1	addpd	%xmm0, %xmm2	addpd	%xmm1, %xmm3	movapd	%xmm2,  %xmm0	unpckhpd %xmm2, %xmm2	movapd	%xmm3, %xmm1	unpckhpd %xmm3, %xmm3#ifndef CONJ	subsd	 %xmm0, %xmm2	addsd	 %xmm3, %xmm1	movapd	 %xmm2, %xmm0#else	addsd	 %xmm2, %xmm0	subsd	 %xmm3, %xmm1#endif#if defined(F_INTERFACE) && defined(F_INTERFACE_F2C)	movlpd	%xmm0, 0 * SIZE(RESULT)	movlpd	%xmm1, 1 * SIZE(RESULT)#endif	RESTOREREGISTERS	ret	ALIGN_3.L50:#ifdef F_INTERFACE	testq	INCX, INCX		# if (incx < 0)	jge	.L51	movq	N, %rax		# n	decq	%rax			# n - 1	imulq	INCX, %rax		# (n - 1) * incx	subq	%rax, X	ALIGN_3.L51:	testq	INCY, INCY		# if (incy < 0)	jge	.L52	movq	N, %rax	decq	%rax			# (n - 1)	imulq	INCY, %rax		# (n - 1) * incy	subq	%rax, Y	ALIGN_3.L52:#endif	movq	N,  %rax	sarq	$3, %rax	jle	.L55	movlpd	 0 * SIZE(X), %xmm4	movhpd	 1 * SIZE(X), %xmm4	addq	 INCX, X	movlpd	 0 * SIZE(X), %xmm5	movhpd	 1 * SIZE(X), %xmm5	addq	 INCX, X	movlpd	 0 * SIZE(X), %xmm6	movhpd	 1 * SIZE(X), %xmm6	addq	 INCX, X	movlpd	 0 * SIZE(X), %xmm7	movhpd	 1 * SIZE(X), %xmm7	addq	 INCX, X	MOVDDUP( 0 * SIZE, Y, %xmm8)	MOVDDUP( 1 * SIZE, Y, %xmm9)	addq	 INCY, Y	MOVDDUP( 0 * SIZE, Y, %xmm10)	MOVDDUP( 1 * SIZE, Y, %xmm11)	addq	 INCY, Y	mulpd	 %xmm4, %xmm8	mulpd	 %xmm4, %xmm9	mulpd	 %xmm5, %xmm10	mulpd	 %xmm5, %xmm11	MOVDDUP( 0 * SIZE, Y, %xmm12)	MOVDDUP( 1 * SIZE, Y, %xmm13)	addq	 INCY, Y	MOVDDUP( 0 * SIZE, Y, %xmm14)	MOVDDUP( 1 * SIZE, Y, %xmm15)	addq	 INCY, Y	decq	%rax	jle	.L54	ALIGN_3.L53:	addpd	 %xmm8, %xmm0	mulpd	 %xmm6, %xmm12	addpd	 %xmm9, %xmm1	mulpd	 %xmm6, %xmm13	addpd	 %xmm10, %xmm2	mulpd	 %xmm7, %xmm14	addpd	 %xmm11, %xmm3	mulpd	 %xmm7, %xmm15	movlpd	 0 * SIZE(X), %xmm4	movhpd	 1 * SIZE(X), %xmm4	addq	 INCX, X	movlpd	 0 * SIZE(X), %xmm5	movhpd	 1 * SIZE(X), %xmm5	addq	 INCX, X	movlpd	 0 * SIZE(X), %xmm6	movhpd	 1 * SIZE(X), %xmm6	addq	 INCX, X	movlpd	 0 * SIZE(X), %xmm7	movhpd	 1 * SIZE(X), %xmm7	addq	 INCX, X	MOVDDUP( 0 * SIZE, Y, %xmm8)	MOVDDUP( 1 * SIZE, Y, %xmm9)	addq	 INCY, Y	MOVDDUP( 0 * SIZE, Y, %xmm10)	MOVDDUP( 1 * SIZE, Y, %xmm11)	addq	 INCY, Y	addpd	 %xmm12, %xmm0	mulpd	 %xmm4, %xmm8	addpd	 %xmm13, %xmm1	mulpd	 %xmm4, %xmm9	addpd	 %xmm14, %xmm2	mulpd	 %xmm5, %xmm10	addpd	 %xmm15, %xmm3	mulpd	 %xmm5, %xmm11	MOVDDUP( 0 * SIZE, Y, %xmm12)	MOVDDUP( 1 * SIZE, Y, %xmm13)	addq	 INCY, Y	MOVDDUP( 0 * SIZE, Y, %xmm14)	MOVDDUP( 1 * SIZE, Y, %xmm15)	addq	 INCY, Y	addpd	 %xmm8, %xmm0	mulpd	 %xmm6, %xmm12	addpd	 %xmm9, %xmm1	mulpd	 %xmm6, %xmm13	addpd	 %xmm10, %xmm2	mulpd	 %xmm7, %xmm14	addpd	 %xmm11, %xmm3	mulpd	 %xmm7, %xmm15	movlpd	 0 * SIZE(X), %xmm4	movhpd	 1 * SIZE(X), %xmm4	addq	 INCX, X	movlpd	 0 * SIZE(X), %xmm5	movhpd	 1 * SIZE(X), %xmm5	addq	 INCX, X	movlpd	 0 * SIZE(X), %xmm6	movhpd	 1 * SIZE(X), %xmm6	addq	 INCX, X	movlpd	 0 * SIZE(X), %xmm7	movhpd	 1 * SIZE(X), %xmm7	addq	 INCX, X	MOVDDUP( 0 * SIZE, Y, %xmm8)	MOVDDUP( 1 * SIZE, Y, %xmm9)	addq	 INCY, Y	MOVDDUP( 0 * SIZE, Y, %xmm10)	MOVDDUP( 1 * SIZE, Y, %xmm11)	addq	 INCY, Y	addpd	 %xmm12, %xmm0	mulpd	 %xmm4, %xmm8	addpd	 %xmm13, %xmm1	mulpd	 %xmm4, %xmm9	addpd	 %xmm14, %xmm2	mulpd	 %xmm5, %xmm10	addpd	 %xmm15, %xmm3	mulpd	 %xmm5, %xmm11	MOVDDUP( 0 * SIZE, Y, %xmm12)	MOVDDUP( 1 * SIZE, Y, %xmm13)	addq	 INCY, Y	MOVDDUP( 0 * SIZE, Y, %xmm14)	MOVDDUP( 1 * SIZE, Y, %xmm15)	addq	 INCY, Y	decq	%rax	jg	.L53	ALIGN_3.L54:	addpd	 %xmm8, %xmm0	mulpd	 %xmm6, %xmm12	addpd	 %xmm9, %xmm1	mulpd	 %xmm6, %xmm13	addpd	 %xmm10, %xmm2	mulpd	 %xmm7, %xmm14	addpd	 %xmm11, %xmm3	mulpd	 %xmm7, %xmm15	movlpd	 0 * SIZE(X), %xmm4	movhpd	 1 * SIZE(X), %xmm4	addq	 INCX, X	movlpd	 0 * SIZE(X), %xmm5	movhpd	 1 * SIZE(X), %xmm5	addq	 INCX, X	movlpd	 0 * SIZE(X), %xmm6	movhpd	 1 * SIZE(X), %xmm6	addq	 INCX, X	movlpd	 0 * SIZE(X), %xmm7	movhpd	 1 * SIZE(X), %xmm7	addq	 INCX, X	MOVDDUP( 0 * SIZE, Y, %xmm8)	MOVDDUP( 1 * SIZE, Y, %xmm9)	addq	 INCY, Y	MOVDDUP( 0 * SIZE, Y, %xmm10)	MOVDDUP( 1 * SIZE, Y, %xmm11)	addq	 INCY, Y	addpd	 %xmm12, %xmm0	mulpd	 %xmm4, %xmm8	addpd	 %xmm13, %xmm1	mulpd	 %xmm4, %xmm9	addpd	 %xmm14, %xmm2	mulpd	 %xmm5, %xmm10	addpd	 %xmm15, %xmm3	mulpd	 %xmm5, %xmm11	MOVDDUP( 0 * SIZE, Y, %xmm12)	MOVDDUP( 1 * SIZE, Y, %xmm13)	addq	 INCY, Y	MOVDDUP( 0 * SIZE, Y, %xmm14)	MOVDDUP( 1 * SIZE, Y, %xmm15)	addq	 INCY, Y	addpd	 %xmm8, %xmm0	mulpd	 %xmm6, %xmm12	addpd	 %xmm9, %xmm1	mulpd	 %xmm6, %xmm13	addpd	 %xmm10, %xmm2	mulpd	 %xmm7, %xmm14	addpd	 %xmm11, %xmm3	mulpd	 %xmm7, %xmm15	addpd	 %xmm12, %xmm0	mulpd	 %xmm4, %xmm8	addpd	 %xmm13, %xmm1	mulpd	 %xmm4, %xmm9	addpd	 %xmm14, %xmm2	mulpd	 %xmm5, %xmm10	addpd	 %xmm15, %xmm3	mulpd	 %xmm5, %xmm11	ALIGN_3.L55:	testq	$7, N	je	.L999	movq	N,  %rax	andq	$4, %rax	jle	.L56	movlpd	 0 * SIZE(X), %xmm4	movhpd	 1 * SIZE(X), %xmm4	addq	 INCX, X	movlpd	 0 * SIZE(X), %xmm5	movhpd	 1 * SIZE(X), %xmm5	addq	 INCX, X	movlpd	 0 * SIZE(X), %xmm6	movhpd	 1 * SIZE(X), %xmm6	addq	 INCX, X	movlpd	 0 * SIZE(X), %xmm7	movhpd	 1 * SIZE(X), %xmm7	addq	 INCX, X	MOVDDUP( 0 * SIZE, Y, %xmm8)	MOVDDUP( 1 * SIZE, Y, %xmm9)	addq	 INCY, Y	MOVDDUP( 0 * SIZE, Y, %xmm10)	MOVDDUP( 1 * SIZE, Y, %xmm11)	addq	 INCY, Y	mulpd	 %xmm4, %xmm8	mulpd	 %xmm4, %xmm9	mulpd	 %xmm5, %xmm10	mulpd	 %xmm5, %xmm11	MOVDDUP( 0 * SIZE, Y, %xmm12)	MOVDDUP( 1 * SIZE, Y, %xmm13)	addq	 INCY, Y	MOVDDUP( 0 * SIZE, Y, %xmm14)	MOVDDUP( 1 * SIZE, Y, %xmm15)	addq	 INCY, Y	addpd	 %xmm8, %xmm0	mulpd	 %xmm6, %xmm12	addpd	 %xmm9, %xmm1	mulpd	 %xmm6, %xmm13	addpd	 %xmm10, %xmm2	mulpd	 %xmm7, %xmm14	addpd	 %xmm11, %xmm3	mulpd	 %xmm7, %xmm15	addpd	 %xmm12, %xmm0	addpd	 %xmm13, %xmm1	addpd	 %xmm14, %xmm2	addpd	 %xmm15, %xmm3	ALIGN_3.L56:	movq	N,  %rax	andq	$2, %rax	jle	.L57	movlpd	 0 * SIZE(X), %xmm4	movhpd	 1 * SIZE(X), %xmm4	addq	 INCX, X	movlpd	 0 * SIZE(X), %xmm5	movhpd	 1 * SIZE(X), %xmm5	addq	 INCX, X	MOVDDUP( 0 * SIZE, Y, %xmm8)	MOVDDUP( 1 * SIZE, Y, %xmm9)	addq	 INCY, Y	MOVDDUP( 0 * SIZE, Y, %xmm10)	MOVDDUP( 1 * SIZE, Y, %xmm11)	addq	 INCY, Y	mulpd	 %xmm4, %xmm8	mulpd	 %xmm4, %xmm9	mulpd	 %xmm5, %xmm10	mulpd	 %xmm5, %xmm11	addpd	 %xmm8, %xmm0	addpd	 %xmm9, %xmm1	addpd	 %xmm10, %xmm2	addpd	 %xmm11, %xmm3	ALIGN_3.L57:	movq	N,  %rax	andq	$1, %rax	jle	.L999	movlpd	 0 * SIZE(X), %xmm4	movhpd	 1 * SIZE(X), %xmm4	addq	 INCX, X	MOVDDUP( 0 * SIZE, Y, %xmm8)	MOVDDUP( 1 * SIZE, Y, %xmm9)	mulpd	 %xmm4, %xmm8	mulpd	 %xmm4, %xmm9	addpd	 %xmm8, %xmm0	addpd	 %xmm9, %xmm1	ALIGN_3.L999:	addpd	%xmm0, %xmm2	addpd	%xmm1, %xmm3	movapd	%xmm2,  %xmm0	unpckhpd %xmm2, %xmm2	movapd	%xmm3, %xmm1	unpckhpd %xmm3, %xmm3#ifndef CONJ	subsd	 %xmm3, %xmm0	addsd	 %xmm2, %xmm1#else	addsd	 %xmm3, %xmm0	subsd	 %xmm2, %xmm1#endif#if defined(F_INTERFACE) && defined(F_INTERFACE_F2C)	movlpd	%xmm0, 0 * SIZE(RESULT)	movlpd	%xmm1, 1 * SIZE(RESULT)#endif	RESTOREREGISTERS	ret	EPILOGUE

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -