⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 gemv_n_sse2.s

📁 Optimized GotoBLAS libraries
💻 S
📖 第 1 页 / 共 4 页
字号:
	mulsd	STACK_ALPHA, %xmm0	mulsd	STACK_ALPHA, %xmm1	mulsd	STACK_ALPHA, %xmm2	mulsd	STACK_ALPHA, %xmm3	unpcklpd %xmm0, %xmm0	unpcklpd %xmm1, %xmm1	unpcklpd %xmm2, %xmm2	unpcklpd %xmm3, %xmm3	ALIGN_3	testq	$SIZE, A	je	.L42	movsd	 0 * SIZE(Y1), %xmm4	movsd	 0 * SIZE(A1), %xmm8	movsd	 0 * SIZE(A2), %xmm9	movsd	 0 * SIZE(A1, LDA, 2), %xmm10	movsd	 0 * SIZE(A2, LDA, 2), %xmm11	mulsd	 %xmm0, %xmm8	mulsd	 %xmm1, %xmm9	mulsd	 %xmm2, %xmm10	mulsd	 %xmm3, %xmm11	addsd	 %xmm8,  %xmm4	addsd	 %xmm9,  %xmm4	addsd	 %xmm10, %xmm4	addsd	 %xmm11, %xmm4	movsd	 %xmm4, 0 * SIZE(Y1)	addq	 $1 * SIZE, A1	addq	 $1 * SIZE, A2	addq	 $1 * SIZE, Y1	ALIGN_3.L42:	movq	MM,  I	sarq	$4, I	jle	.L45	movapd	 0 * SIZE(A1), %xmm8	movapd	 2 * SIZE(A1), %xmm9	movapd	 4 * SIZE(A1), %xmm10	movapd	 6 * SIZE(A1), %xmm11	movsd	 0 * SIZE(A2), %xmm12	movhpd	 1 * SIZE(A2), %xmm12	movsd	 2 * SIZE(A2), %xmm13	movhpd	 3 * SIZE(A2), %xmm13	movsd	 4 * SIZE(A2), %xmm14	movhpd	 5 * SIZE(A2), %xmm14	movsd	 6 * SIZE(A2), %xmm15	movhpd	 7 * SIZE(A2), %xmm15	movapd	 0 * SIZE(Y1), %xmm4	movapd	 2 * SIZE(Y1), %xmm5	movapd	 4 * SIZE(Y1), %xmm6	movapd	 6 * SIZE(Y1), %xmm7	mulpd	 %xmm0, %xmm8	mulpd	 %xmm0, %xmm9	mulpd	 %xmm0, %xmm10	mulpd	 %xmm0, %xmm11	decq	 I	jle	 .L44	ALIGN_3.L43:	PREFETCH	PREFETCHSIZE * SIZE(A1)	addpd	 %xmm8,  %xmm4	movapd	 0 * SIZE(A1, LDA, 2), %xmm8	mulpd	 %xmm1, %xmm12	PREFETCH	PREFETCHSIZE * SIZE(Y1)	addpd	 %xmm9,  %xmm5	movapd	 2 * SIZE(A1, LDA, 2), %xmm9	mulpd	 %xmm1, %xmm13	addpd	 %xmm10, %xmm6	movapd	 4 * SIZE(A1, LDA, 2), %xmm10	mulpd	 %xmm1, %xmm14	addpd	 %xmm11, %xmm7	movapd	 6 * SIZE(A1, LDA, 2), %xmm11	mulpd	 %xmm1, %xmm15	addpd	 %xmm12, %xmm4	movsd	 0 * SIZE(A2, LDA, 2), %xmm12	movhpd	 1 * SIZE(A2, LDA, 2), %xmm12	mulpd	 %xmm2, %xmm8	addpd	 %xmm13, %xmm5	movsd	 2 * SIZE(A2, LDA, 2), %xmm13	movhpd	 3 * SIZE(A2, LDA, 2), %xmm13	mulpd	 %xmm2, %xmm9	addpd	 %xmm14, %xmm6	movsd	 4 * SIZE(A2, LDA, 2), %xmm14	movhpd	 5 * SIZE(A2, LDA, 2), %xmm14	mulpd	 %xmm2, %xmm10	addpd	 %xmm15, %xmm7	movsd	 6 * SIZE(A2, LDA, 2), %xmm15	movhpd	 7 * SIZE(A2, LDA, 2), %xmm15	mulpd	 %xmm2, %xmm11	PREFETCH	PREFETCHSIZE * SIZE(A2)	addpd	 %xmm8,  %xmm4	movapd	 8 * SIZE(A1), %xmm8	mulpd	 %xmm3, %xmm12	addpd	 %xmm9,  %xmm5	movapd	10 * SIZE(A1), %xmm9	mulpd	 %xmm3, %xmm13	addpd	 %xmm10, %xmm6	movapd	12 * SIZE(A1), %xmm10	mulpd	 %xmm3, %xmm14	addpd	 %xmm11, %xmm7	movapd	14 * SIZE(A1), %xmm11	mulpd	 %xmm3, %xmm15	addpd	 %xmm12, %xmm4	movsd	 8 * SIZE(A2), %xmm12	movhpd	 9 * SIZE(A2), %xmm12	mulpd	 %xmm0, %xmm8	addpd	 %xmm13, %xmm5	movsd	10 * SIZE(A2), %xmm13	movhpd	11 * SIZE(A2), %xmm13	mulpd	 %xmm0, %xmm9	addpd	 %xmm14, %xmm6	movsd	12 * SIZE(A2), %xmm14	movhpd	13 * SIZE(A2), %xmm14	mulpd	 %xmm0, %xmm10	addpd	 %xmm15, %xmm7	movsd	14 * SIZE(A2), %xmm15	movhpd	15 * SIZE(A2), %xmm15	mulpd	 %xmm0, %xmm11	movapd	 %xmm4, 0 * SIZE(Y1)	movapd	 8 * SIZE(Y1), %xmm4	movapd	 %xmm5, 2 * SIZE(Y1)	movapd	10 * SIZE(Y1), %xmm5	movapd	 %xmm6, 4 * SIZE(Y1)	movapd	12 * SIZE(Y1), %xmm6	movapd	 %xmm7, 6 * SIZE(Y1)	movapd	14 * SIZE(Y1), %xmm7	PREFETCH	PREFETCHSIZE * SIZE(A1, LDA, 2)	addpd	 %xmm8,  %xmm4	movapd	 8 * SIZE(A1, LDA, 2), %xmm8	mulpd	 %xmm1, %xmm12	addpd	 %xmm9,  %xmm5	movapd	10 * SIZE(A1, LDA, 2), %xmm9	mulpd	 %xmm1, %xmm13	addpd	 %xmm10, %xmm6	movapd	12 * SIZE(A1, LDA, 2), %xmm10	mulpd	 %xmm1, %xmm14	addpd	 %xmm11, %xmm7	movapd	14 * SIZE(A1, LDA, 2), %xmm11	mulpd	 %xmm1, %xmm15	addpd	 %xmm12, %xmm4	movsd	 8 * SIZE(A2, LDA, 2), %xmm12	movhpd	 9 * SIZE(A2, LDA, 2), %xmm12	mulpd	 %xmm2, %xmm8	addpd	 %xmm13, %xmm5	movsd	10 * SIZE(A2, LDA, 2), %xmm13	movhpd	11 * SIZE(A2, LDA, 2), %xmm13	mulpd	 %xmm2, %xmm9	addpd	 %xmm14, %xmm6	movsd	12 * SIZE(A2, LDA, 2), %xmm14	movhpd	13 * SIZE(A2, LDA, 2), %xmm14	mulpd	 %xmm2, %xmm10	addpd	 %xmm15, %xmm7	movsd	14 * SIZE(A2, LDA, 2), %xmm15	movhpd	15 * SIZE(A2, LDA, 2), %xmm15	mulpd	 %xmm2, %xmm11	PREFETCH	PREFETCHSIZE * SIZE(A2, LDA, 2)	addpd	 %xmm8,  %xmm4	movapd	16 * SIZE(A1), %xmm8	mulpd	 %xmm3, %xmm12	addpd	 %xmm9,  %xmm5	movapd	18 * SIZE(A1), %xmm9	mulpd	 %xmm3, %xmm13	addpd	 %xmm10, %xmm6	movapd	20 * SIZE(A1), %xmm10	mulpd	 %xmm3, %xmm14	addpd	 %xmm11, %xmm7	movapd	22 * SIZE(A1), %xmm11	mulpd	 %xmm3, %xmm15	addpd	 %xmm12, %xmm4	movsd	16 * SIZE(A2), %xmm12	movhpd	17 * SIZE(A2), %xmm12	mulpd	 %xmm0, %xmm8	addpd	 %xmm13, %xmm5	movsd	18 * SIZE(A2), %xmm13	movhpd	19 * SIZE(A2), %xmm13	mulpd	 %xmm0, %xmm9	addpd	 %xmm14, %xmm6	movsd	20 * SIZE(A2), %xmm14	movhpd	21 * SIZE(A2), %xmm14	mulpd	 %xmm0, %xmm10	addpd	 %xmm15, %xmm7	movsd	22 * SIZE(A2), %xmm15	movhpd	23 * SIZE(A2), %xmm15	mulpd	 %xmm0, %xmm11	movapd	 %xmm4,  8 * SIZE(Y1)	movapd	16 * SIZE(Y1), %xmm4	movapd	 %xmm5, 10 * SIZE(Y1)	movapd	18 * SIZE(Y1), %xmm5	movapd	 %xmm6, 12 * SIZE(Y1)	movapd	20 * SIZE(Y1), %xmm6	movapd	 %xmm7, 14 * SIZE(Y1)	movapd	22 * SIZE(Y1), %xmm7	addq	 $16 * SIZE, A1	addq	 $16 * SIZE, A2	addq	 $16 * SIZE, Y1	decq	 I	jg	.L43	ALIGN_3.L44:	addpd	 %xmm8,  %xmm4	movapd	 0 * SIZE(A1, LDA, 2), %xmm8	mulpd	 %xmm1, %xmm12	addpd	 %xmm9,  %xmm5	movapd	 2 * SIZE(A1, LDA, 2), %xmm9	mulpd	 %xmm1, %xmm13	addpd	 %xmm10, %xmm6	movapd	 4 * SIZE(A1, LDA, 2), %xmm10	mulpd	 %xmm1, %xmm14	addpd	 %xmm11, %xmm7	movapd	 6 * SIZE(A1, LDA, 2), %xmm11	mulpd	 %xmm1, %xmm15	addpd	 %xmm12, %xmm4	movsd	 0 * SIZE(A2, LDA, 2), %xmm12	movhpd	 1 * SIZE(A2, LDA, 2), %xmm12	mulpd	 %xmm2, %xmm8	addpd	 %xmm13, %xmm5	movsd	 2 * SIZE(A2, LDA, 2), %xmm13	movhpd	 3 * SIZE(A2, LDA, 2), %xmm13	mulpd	 %xmm2, %xmm9	addpd	 %xmm14, %xmm6	movsd	 4 * SIZE(A2, LDA, 2), %xmm14	movhpd	 5 * SIZE(A2, LDA, 2), %xmm14	mulpd	 %xmm2, %xmm10	addpd	 %xmm15, %xmm7	movsd	 6 * SIZE(A2, LDA, 2), %xmm15	movhpd	 7 * SIZE(A2, LDA, 2), %xmm15	mulpd	 %xmm2, %xmm11	addpd	 %xmm8,  %xmm4	movapd	 8 * SIZE(A1), %xmm8	mulpd	 %xmm3, %xmm12	addpd	 %xmm9,  %xmm5	movapd	10 * SIZE(A1), %xmm9	mulpd	 %xmm3, %xmm13	addpd	 %xmm10, %xmm6	movapd	12 * SIZE(A1), %xmm10	mulpd	 %xmm3, %xmm14	addpd	 %xmm11, %xmm7	movapd	14 * SIZE(A1), %xmm11	mulpd	 %xmm3, %xmm15	addpd	 %xmm12, %xmm4	movsd	 8 * SIZE(A2), %xmm12	movhpd	 9 * SIZE(A2), %xmm12	mulpd	 %xmm0, %xmm8	addpd	 %xmm13, %xmm5	movsd	10 * SIZE(A2), %xmm13	movhpd	11 * SIZE(A2), %xmm13	mulpd	 %xmm0, %xmm9	addpd	 %xmm14, %xmm6	movsd	12 * SIZE(A2), %xmm14	movhpd	13 * SIZE(A2), %xmm14	mulpd	 %xmm0, %xmm10	addpd	 %xmm15, %xmm7	movsd	14 * SIZE(A2), %xmm15	movhpd	15 * SIZE(A2), %xmm15	mulpd	 %xmm0, %xmm11	movapd	 %xmm4, 0 * SIZE(Y1)	movapd	 8 * SIZE(Y1), %xmm4	movapd	 %xmm5, 2 * SIZE(Y1)	movapd	10 * SIZE(Y1), %xmm5	movapd	 %xmm6, 4 * SIZE(Y1)	movapd	12 * SIZE(Y1), %xmm6	movapd	 %xmm7, 6 * SIZE(Y1)	movapd	14 * SIZE(Y1), %xmm7	addpd	 %xmm8,  %xmm4	movapd	 8 * SIZE(A1, LDA, 2), %xmm8	mulpd	 %xmm1, %xmm12	addpd	 %xmm9,  %xmm5	movapd	10 * SIZE(A1, LDA, 2), %xmm9	mulpd	 %xmm1, %xmm13	addpd	 %xmm10, %xmm6	movapd	12 * SIZE(A1, LDA, 2), %xmm10	mulpd	 %xmm1, %xmm14	addpd	 %xmm11, %xmm7	movapd	14 * SIZE(A1, LDA, 2), %xmm11	mulpd	 %xmm1, %xmm15	addpd	 %xmm12, %xmm4	movsd	 8 * SIZE(A2, LDA, 2), %xmm12	movhpd	 9 * SIZE(A2, LDA, 2), %xmm12	mulpd	 %xmm2, %xmm8	addpd	 %xmm13, %xmm5	movsd	10 * SIZE(A2, LDA, 2), %xmm13	movhpd	11 * SIZE(A2, LDA, 2), %xmm13	mulpd	 %xmm2, %xmm9	addpd	 %xmm14, %xmm6	movsd	12 * SIZE(A2, LDA, 2), %xmm14	movhpd	13 * SIZE(A2, LDA, 2), %xmm14	mulpd	 %xmm2, %xmm10	addpd	 %xmm15, %xmm7	movsd	14 * SIZE(A2, LDA, 2), %xmm15	movhpd	15 * SIZE(A2, LDA, 2), %xmm15	mulpd	 %xmm2, %xmm11	addpd	 %xmm8,  %xmm4	mulpd	 %xmm3, %xmm12	addpd	 %xmm9,  %xmm5	mulpd	 %xmm3, %xmm13	addpd	 %xmm10, %xmm6	mulpd	 %xmm3, %xmm14	addpd	 %xmm11, %xmm7	mulpd	 %xmm3, %xmm15	addpd	 %xmm12, %xmm4	addpd	 %xmm13, %xmm5	addpd	 %xmm14, %xmm6	addpd	 %xmm15, %xmm7	movapd	 %xmm4,  8 * SIZE(Y1)	movapd	 %xmm5, 10 * SIZE(Y1)	movapd	 %xmm6, 12 * SIZE(Y1)	movapd	 %xmm7, 14 * SIZE(Y1)	addq	 $16 * SIZE, A1	addq	 $16 * SIZE, A2	addq	 $16 * SIZE, Y1	ALIGN_3.L45:	testq	$8, MM	je	.L46	movapd	 0 * SIZE(Y1), %xmm4	movapd	 2 * SIZE(Y1), %xmm5	movapd	 4 * SIZE(Y1), %xmm6	movapd	 6 * SIZE(Y1), %xmm7	movapd	 0 * SIZE(A1), %xmm8	movapd	 2 * SIZE(A1), %xmm9	movapd	 4 * SIZE(A1), %xmm10	movapd	 6 * SIZE(A1), %xmm11	movsd	 0 * SIZE(A2), %xmm12	movhpd	 1 * SIZE(A2), %xmm12	movsd	 2 * SIZE(A2), %xmm13	movhpd	 3 * SIZE(A2), %xmm13	movsd	 4 * SIZE(A2), %xmm14	movhpd	 5 * SIZE(A2), %xmm14	movsd	 6 * SIZE(A2), %xmm15	movhpd	 7 * SIZE(A2), %xmm15	mulpd	 %xmm0, %xmm8	mulpd	 %xmm0, %xmm9	mulpd	 %xmm0, %xmm10	mulpd	 %xmm0, %xmm11	addpd	 %xmm8,  %xmm4	movapd	 0 * SIZE(A1, LDA, 2), %xmm8	mulpd	 %xmm1, %xmm12	addpd	 %xmm9,  %xmm5	movapd	 2 * SIZE(A1, LDA, 2), %xmm9	mulpd	 %xmm1, %xmm13	addpd	 %xmm10, %xmm6	movapd	 4 * SIZE(A1, LDA, 2), %xmm10	mulpd	 %xmm1, %xmm14	addpd	 %xmm11, %xmm7	movapd	 6 * SIZE(A1, LDA, 2), %xmm11	mulpd	 %xmm1, %xmm15	addpd	 %xmm12, %xmm4	movsd	 0 * SIZE(A2, LDA, 2), %xmm12	movhpd	 1 * SIZE(A2, LDA, 2), %xmm12	mulpd	 %xmm2, %xmm8	addpd	 %xmm13, %xmm5	movsd	 2 * SIZE(A2, LDA, 2), %xmm13	movhpd	 3 * SIZE(A2, LDA, 2), %xmm13	mulpd	 %xmm2, %xmm9	addpd	 %xmm14, %xmm6	movsd	 4 * SIZE(A2, LDA, 2), %xmm14	movhpd	 5 * SIZE(A2, LDA, 2), %xmm14	mulpd	 %xmm2, %xmm10	addpd	 %xmm15, %xmm7	movsd	 6 * SIZE(A2, LDA, 2), %xmm15	movhpd	 7 * SIZE(A2, LDA, 2), %xmm15	mulpd	 %xmm2, %xmm11	addpd	 %xmm8,  %xmm4	mulpd	 %xmm3, %xmm12	addpd	 %xmm9,  %xmm5	mulpd	 %xmm3, %xmm13	addpd	 %xmm10, %xmm6	mulpd	 %xmm3, %xmm14	addpd	 %xmm11, %xmm7	mulpd	 %xmm3, %xmm15	addpd	 %xmm12, %xmm4	mulpd	 %xmm0, %xmm8	addpd	 %xmm13, %xmm5	mulpd	 %xmm0, %xmm9	addpd	 %xmm14, %xmm6	mulpd	 %xmm0, %xmm10	addpd	 %xmm15, %xmm7	mulpd	 %xmm0, %xmm11	movapd	 %xmm4, 0 * SIZE(Y1)	movapd	 %xmm5, 2 * SIZE(Y1)	movapd	 %xmm6, 4 * SIZE(Y1)	movapd	 %xmm7, 6 * SIZE(Y1)	addq	 $8 * SIZE, A1	addq	 $8 * SIZE, A2	addq	 $8 * SIZE, Y1	ALIGN_3.L46:	testq	$4, MM	je	.L47	movapd	 0 * SIZE(Y1), %xmm4	movapd	 2 * SIZE(Y1), %xmm5	movapd	 0 * SIZE(A1), %xmm8	movapd	 2 * SIZE(A1), %xmm9	movsd	 0 * SIZE(A2), %xmm10	movhpd	 1 * SIZE(A2), %xmm10	movsd	 2 * SIZE(A2), %xmm11	movhpd	 3 * SIZE(A2), %xmm11	movapd	 0 * SIZE(A1, LDA, 2), %xmm12	movapd	 2 * SIZE(A1, LDA, 2), %xmm13	movsd	 0 * SIZE(A2, LDA, 2), %xmm14	movhpd	 1 * SIZE(A2, LDA, 2), %xmm14	movsd	 2 * SIZE(A2, LDA, 2), %xmm15	movhpd	 3 * SIZE(A2, LDA, 2), %xmm15	mulpd	 %xmm0, %xmm8	mulpd	 %xmm0, %xmm9	mulpd	 %xmm1, %xmm10	mulpd	 %xmm1, %xmm11	mulpd	 %xmm2, %xmm12	mulpd	 %xmm2, %xmm13	mulpd	 %xmm3, %xmm14	mulpd	 %xmm3, %xmm15	addpd	 %xmm8,  %xmm4	addpd	 %xmm9,  %xmm5	addpd	 %xmm10, %xmm4	addpd	 %xmm11, %xmm5	addpd	 %xmm12, %xmm4	addpd	 %xmm13, %xmm5	addpd	 %xmm14, %xmm4	addpd	 %xmm15, %xmm5	movapd	 %xmm4, 0 * SIZE(Y1)	movapd	 %xmm5, 2 * SIZE(Y1)	addq	 $4 * SIZE, A1	addq	 $4 * SIZE, A2	addq	 $4 * SIZE, Y1	ALIGN_3.L47:	testq	$2, MM	je	.L48	movapd	 0 * SIZE(Y1), %xmm4	movapd	 0 * SIZE(A1), %xmm8	movsd	 0 * SIZE(A2), %xmm10	movhpd	 1 * SIZE(A2), %xmm10	movapd	 0 * SIZE(A1, LDA, 2), %xmm12	movsd	 0 * SIZE(A2, LDA, 2), %xmm14	movhpd	 1 * SIZE(A2, LDA, 2), %xmm14	mulpd	 %xmm0, %xmm8	mulpd	 %xmm1, %xmm10	mulpd	 %xmm2, %xmm12	mulpd	 %xmm3, %xmm14	addpd	 %xmm8,  %xmm4	addpd	 %xmm10, %xmm4	addpd	 %xmm12, %xmm4	addpd	 %xmm14, %xmm4	movapd	 %xmm4, 0 * SIZE(Y1)	addq	 $2 * SIZE, A1	addq	 $2 * SIZE, A2	addq	 $2 * SIZE, Y1	ALIGN_3.L48:	testq	$1, MM	je	.L49	movsd	 0 * SIZE(Y1), %xmm4	movsd	 0 * SIZE(A1), %xmm8	movsd	 0 * SIZE(A2), %xmm9	movsd	 0 * SIZE(A1, LDA, 2), %xmm10	movsd	 0 * SIZE(A2, LDA, 2), %xmm11	mulsd	 %xmm0, %xmm8	mulsd	 %xmm1, %xmm9	mulsd	 %xmm2, %xmm10	mulsd	 %xmm3, %xmm11	addsd	 %xmm8,  %xmm4	addsd	 %xmm9,  %xmm4	addsd	 %xmm10, %xmm4	addsd	 %xmm11, %xmm4	movsd	 %xmm4, 0 * SIZE(Y1)	addq	 $1 * SIZE, A1	addq	 $1 * SIZE, A2	addq	 $1 * SIZE, Y1	ALIGN_3.L49:	decq	J	jg	.L41	ALIGN_3.L50:	testq	$2, N	je	.L60	movq	BUFFER, Y1	movq	A,  A1	leaq	(A, LDA, 1), A2	leaq	(A, LDA, 2), A	movsd	(X), %xmm0	addq	INCX, X	movsd	(X), %xmm1	addq	INCX, X	mulsd	STACK_ALPHA, %xmm0	mulsd	STACK_ALPHA, %xmm1	unpcklpd %xmm0, %xmm0	unpcklpd %xmm1, %xmm1	testq	$SIZE, A	je	.L52	movsd	 (Y1), %xmm4	movsd	 (A1), %xmm8	movsd	 (A2), %xmm9	mulsd	 %xmm0, %xmm8	mulsd	 %xmm1, %xmm9	addsd	 %xmm8,  %xmm4	addsd	 %xmm9,  %xmm4	movsd	 %xmm4, 0 * SIZE(Y1)	addq	 $1 * SIZE, A1	addq	 $1 * SIZE, A2	addq	 $1 * SIZE, Y1	ALIGN_3.L52:	movq	MM,  I	sarq	$4, I	jle	.L55	movapd	 0 * SIZE(A1), %xmm8	movapd	 2 * SIZE(A1), %xmm9	movapd	 4 * SIZE(A1), %xmm10	movapd	 6 * SIZE(A1), %xmm11	movsd	 0 * SIZE(A2), %xmm12	movhpd	 1 * SIZE(A2), %xmm12	movsd	 2 * SIZE(A2), %xmm13	movhpd	 3 * SIZE(A2), %xmm13	movsd	 4 * SIZE(A2), %xmm14	movhpd	 5 * SIZE(A2), %xmm14	movsd	 6 * SIZE(A2), %xmm15	movhpd	 7 * SIZE(A2), %xmm15	movapd	 0 * SIZE(Y1), %xmm4	movapd	 2 * SIZE(Y1), %xmm5	movapd	 4 * SIZE(Y1), %xmm6	movapd	 6 * SIZE(Y1), %xmm7	mulpd	 %xmm0, %xmm8	mulpd	 %xmm0, %xmm9	mulpd	 %xmm0, %xmm10	mulpd	 %xmm0, %xmm11	decq	 I	jle	 .L54	ALIGN_3.L53:	PREFETCH	PREFETCHSIZE * SIZE(A1)	addpd	 %xmm8,  %xmm4	movapd	 8 * SIZE(A1), %xmm8	mulpd	 %xmm1, %xmm12	addpd	 %xmm9,  %xmm5	movapd	10 * SIZE(A1), %xmm9	mulpd	 %xmm1, %xmm13	addpd	 %xmm10, %xmm6	movapd	12 * SIZE(A1), %xmm10	mulpd	 %xmm1, %xmm14	addpd	 %xmm11, %xmm7	movapd	14 * SIZE(A1), %xmm11	mulpd	 %xmm1, %xmm15	PREFETCH	PREFETCHSIZE * SIZE(Y1)	addpd	 %xmm12, %xmm4	movsd	 8 * SIZE(A2), %xmm12	movhpd	 9 * SIZE(A2), %xmm12	mulpd	 %xmm0, %xmm8

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -