📄 zgemv_t_sse2_core2.s
字号:
movapd -12 * SIZE(BO), %xmm12 movapd -14 * SIZE(AO1), %xmm8 pshufd $0x4e, %xmm8, %xmm9 mulpd %xmm13, %xmm8 addpd %xmm8, %xmm2 mulpd %xmm13, %xmm9 ADD %xmm9, %xmm3 movapd -10 * SIZE(BO), %xmm13 subq $-4 * SIZE, AO1 subq $-4 * SIZE, BO ALIGN_2.L46: movq MIN_N, I andq $1, I jle .L47 movapd -16 * SIZE(AO1), %xmm8 pshufd $0x4e, %xmm8, %xmm9 mulpd %xmm12, %xmm8 addpd %xmm8, %xmm0 mulpd %xmm12, %xmm9 ADD %xmm9, %xmm1 ALIGN_3.L47: addpd %xmm2, %xmm0 addpd %xmm3, %xmm1#if (!defined(CONJ) && !defined(XCONJ)) || \ ( defined(CONJ) && defined(XCONJ)) pxor BETA, %xmm0#else pxor BETA, %xmm1#endif haddpd %xmm1, %xmm0 pshufd $0x4e, %xmm0, %xmm1 mulpd ALPHA, %xmm0 mulpd ALPHA, %xmm1 xorpd BETA, %xmm0 haddpd %xmm1, %xmm0 movsd 0 * SIZE(CO), %xmm8 movhpd 1 * SIZE(CO), %xmm8 addpd %xmm8, %xmm0 movsd %xmm0, 0 * SIZE(CO) movhpd %xmm0, 1 * SIZE(CO) ALIGN_3.L99: addq NLDA, A addq $P, IS cmpq M, IS jl .L10 movq 0(%rsp), %rbx movq 8(%rsp), %rbp movq 16(%rsp), %r12 movq 24(%rsp), %r13 movq 32(%rsp), %r14 movq 40(%rsp), %r15#ifdef WINDOWS_ABI movq 48(%rsp), %rdi movq 56(%rsp), %rsi movups 64(%rsp), %xmm6 movups 80(%rsp), %xmm7 movups 96(%rsp), %xmm8 movups 112(%rsp), %xmm9 movups 128(%rsp), %xmm10 movups 144(%rsp), %xmm11 movups 160(%rsp), %xmm12 movups 176(%rsp), %xmm13 movups 192(%rsp), %xmm14 movups 208(%rsp), %xmm15#endif addq $STACKSIZE, %rsp ret ALIGN_3.L100: movq N, J sarq $2, J jle .L130 ALIGN_3 .L121: movq A, AO1 leaq (A, LDA, 1), AO2 leaq (A, LDA, 4), A movq BUFFER, BO addq $16 * SIZE, BO movapd -16 * SIZE(BO), %xmm12 movapd -14 * SIZE(BO), %xmm13 pxor %xmm0, %xmm0 pxor %xmm1, %xmm1 pxor %xmm2, %xmm2 pxor %xmm3, %xmm3 pxor %xmm4, %xmm4 pxor %xmm5, %xmm5 pxor %xmm6, %xmm6 pxor %xmm7, %xmm7 movq MIN_N, I sarq $3, I jle .L124 ALIGN_3.L122: PREFETCH (PREFETCHSIZE + 0) * SIZE(AO1) movsd -16 * SIZE(AO1), %xmm8 movhpd -15 * SIZE(AO1), %xmm8 movsd -16 * SIZE(AO2), %xmm10 movhpd -15 * SIZE(AO2), %xmm10 pshufd $0x4e, %xmm8, %xmm9 pshufd $0x4e, %xmm10, %xmm11 mulpd %xmm12, %xmm8 mulpd %xmm12, %xmm9 mulpd %xmm12, %xmm10 mulpd %xmm12, %xmm11 addpd %xmm8, %xmm0 ADD %xmm9, %xmm1 addpd %xmm10, %xmm2 ADD %xmm11, %xmm3 movsd -16 * SIZE(AO1, LDA, 2), %xmm8 movhpd -15 * SIZE(AO1, LDA, 2), %xmm8 movsd -16 * SIZE(AO2, LDA, 2), %xmm10 movhpd -15 * SIZE(AO2, LDA, 2), %xmm10 pshufd $0x4e, %xmm8, %xmm9 pshufd $0x4e, %xmm10, %xmm11 mulpd %xmm12, %xmm8 mulpd %xmm12, %xmm9 mulpd %xmm12, %xmm10 mulpd %xmm12, %xmm11 movapd -12 * SIZE(BO), %xmm12 addpd %xmm8, %xmm4 ADD %xmm9, %xmm5 addpd %xmm10, %xmm6 ADD %xmm11, %xmm7 PREFETCH (PREFETCHSIZE + 0) * SIZE(AO2) movsd -14 * SIZE(AO1), %xmm8 movhpd -13 * SIZE(AO1), %xmm8 movsd -14 * SIZE(AO2), %xmm10 movhpd -13 * SIZE(AO2), %xmm10 pshufd $0x4e, %xmm8, %xmm9 pshufd $0x4e, %xmm10, %xmm11 mulpd %xmm13, %xmm8 mulpd %xmm13, %xmm9 mulpd %xmm13, %xmm10 mulpd %xmm13, %xmm11 addpd %xmm8, %xmm0 ADD %xmm9, %xmm1 addpd %xmm10, %xmm2 ADD %xmm11, %xmm3 movsd -14 * SIZE(AO1, LDA, 2), %xmm8 movhpd -13 * SIZE(AO1, LDA, 2), %xmm8 movsd -14 * SIZE(AO2, LDA, 2), %xmm10 movhpd -13 * SIZE(AO2, LDA, 2), %xmm10 pshufd $0x4e, %xmm8, %xmm9 pshufd $0x4e, %xmm10, %xmm11 mulpd %xmm13, %xmm8 mulpd %xmm13, %xmm9 mulpd %xmm13, %xmm10 mulpd %xmm13, %xmm11 movapd -10 * SIZE(BO), %xmm13 addpd %xmm8, %xmm4 ADD %xmm9, %xmm5 addpd %xmm10, %xmm6 ADD %xmm11, %xmm7 PREFETCH (PREFETCHSIZE + 0) * SIZE(AO1, LDA, 2) movsd -12 * SIZE(AO1), %xmm8 movhpd -11 * SIZE(AO1), %xmm8 movsd -12 * SIZE(AO2), %xmm10 movhpd -11 * SIZE(AO2), %xmm10 pshufd $0x4e, %xmm8, %xmm9 pshufd $0x4e, %xmm10, %xmm11 mulpd %xmm12, %xmm8 mulpd %xmm12, %xmm9 mulpd %xmm12, %xmm10 mulpd %xmm12, %xmm11 addpd %xmm8, %xmm0 ADD %xmm9, %xmm1 addpd %xmm10, %xmm2 ADD %xmm11, %xmm3 movsd -12 * SIZE(AO1, LDA, 2), %xmm8 movhpd -11 * SIZE(AO1, LDA, 2), %xmm8 movsd -12 * SIZE(AO2, LDA, 2), %xmm10 movhpd -11 * SIZE(AO2, LDA, 2), %xmm10 pshufd $0x4e, %xmm8, %xmm9 pshufd $0x4e, %xmm10, %xmm11 mulpd %xmm12, %xmm8 mulpd %xmm12, %xmm9 mulpd %xmm12, %xmm10 mulpd %xmm12, %xmm11 movapd -8 * SIZE(BO), %xmm12 addpd %xmm8, %xmm4 ADD %xmm9, %xmm5 addpd %xmm10, %xmm6 ADD %xmm11, %xmm7 PREFETCH (PREFETCHSIZE + 0) * SIZE(AO2, LDA, 2) movsd -10 * SIZE(AO1), %xmm8 movhpd -9 * SIZE(AO1), %xmm8 movsd -10 * SIZE(AO2), %xmm10 movhpd -9 * SIZE(AO2), %xmm10 pshufd $0x4e, %xmm8, %xmm9 pshufd $0x4e, %xmm10, %xmm11 mulpd %xmm13, %xmm8 mulpd %xmm13, %xmm9 mulpd %xmm13, %xmm10 mulpd %xmm13, %xmm11 addpd %xmm8, %xmm0 ADD %xmm9, %xmm1 addpd %xmm10, %xmm2 ADD %xmm11, %xmm3 movsd -10 * SIZE(AO1, LDA, 2), %xmm8 movhpd -9 * SIZE(AO1, LDA, 2), %xmm8 movsd -10 * SIZE(AO2, LDA, 2), %xmm10 movhpd -9 * SIZE(AO2, LDA, 2), %xmm10 pshufd $0x4e, %xmm8, %xmm9 pshufd $0x4e, %xmm10, %xmm11 mulpd %xmm13, %xmm8 mulpd %xmm13, %xmm9 mulpd %xmm13, %xmm10 mulpd %xmm13, %xmm11 movapd -6 * SIZE(BO), %xmm13 addpd %xmm8, %xmm4 ADD %xmm9, %xmm5 addpd %xmm10, %xmm6 ADD %xmm11, %xmm7 PREFETCH (PREFETCHSIZE + 8) * SIZE(AO1) movsd -8 * SIZE(AO1), %xmm8 movhpd -7 * SIZE(AO1), %xmm8 movsd -8 * SIZE(AO2), %xmm10 movhpd -7 * SIZE(AO2), %xmm10 pshufd $0x4e, %xmm8, %xmm9 pshufd $0x4e, %xmm10, %xmm11 mulpd %xmm12, %xmm8 mulpd %xmm12, %xmm9 mulpd %xmm12, %xmm10 mulpd %xmm12, %xmm11 addpd %xmm8, %xmm0 ADD %xmm9, %xmm1 addpd %xmm10, %xmm2 ADD %xmm11, %xmm3 movsd -8 * SIZE(AO1, LDA, 2), %xmm8 movhpd -7 * SIZE(AO1, LDA, 2), %xmm8 movsd -8 * SIZE(AO2, LDA, 2), %xmm10 movhpd -7 * SIZE(AO2, LDA, 2), %xmm10 pshufd $0x4e, %xmm8, %xmm9 pshufd $0x4e, %xmm10, %xmm11 mulpd %xmm12, %xmm8 mulpd %xmm12, %xmm9 mulpd %xmm12, %xmm10 mulpd %xmm12, %xmm11 movapd -4 * SIZE(BO), %xmm12 addpd %xmm8, %xmm4 ADD %xmm9, %xmm5 addpd %xmm10, %xmm6 ADD %xmm11, %xmm7 PREFETCH (PREFETCHSIZE + 8) * SIZE(AO2) movsd -6 * SIZE(AO1), %xmm8 movhpd -5 * SIZE(AO1), %xmm8 movsd -6 * SIZE(AO2), %xmm10 movhpd -5 * SIZE(AO2), %xmm10 pshufd $0x4e, %xmm8, %xmm9 pshufd $0x4e, %xmm10, %xmm11 mulpd %xmm13, %xmm8 mulpd %xmm13, %xmm9 mulpd %xmm13, %xmm10 mulpd %xmm13, %xmm11 addpd %xmm8, %xmm0 ADD %xmm9, %xmm1 addpd %xmm10, %xmm2 ADD %xmm11, %xmm3 movsd -6 * SIZE(AO1, LDA, 2), %xmm8 movhpd -5 * SIZE(AO1, LDA, 2), %xmm8 movsd -6 * SIZE(AO2, LDA, 2), %xmm10 movhpd -5 * SIZE(AO2, LDA, 2), %xmm10 pshufd $0x4e, %xmm8, %xmm9 pshufd $0x4e, %xmm10, %xmm11 mulpd %xmm13, %xmm8 mulpd %xmm13, %xmm9 mulpd %xmm13, %xmm10 mulpd %xmm13, %xmm11 movapd -2 * SIZE(BO), %xmm13 addpd %xmm8, %xmm4 ADD %xmm9, %xmm5 addpd %xmm10, %xmm6 ADD %xmm11, %xmm7 PREFETCH (PREFETCHSIZE + 8) * SIZE(AO1, LDA, 2) movsd -4 * SIZE(AO1), %xmm8 movhpd -3 * SIZE(AO1), %xmm8 movsd -4 * SIZE(AO2), %xmm10 movhpd -3 * SIZE(AO2), %xmm10 pshufd $0x4e, %xmm8, %xmm9 pshufd $0x4e, %xmm10, %xmm11 mulpd %xmm12, %xmm8 mulpd %xmm12, %xmm9 mulpd %xmm12, %xmm10 mulpd %xmm12, %xmm11 addpd %xmm8, %xmm0 ADD %xmm9, %xmm1 addpd %xmm10, %xmm2 ADD %xmm11, %xmm3 movsd -4 * SIZE(AO1, LDA, 2), %xmm8 movhpd -3 * SIZE(AO1, LDA, 2), %xmm8 movsd -4 * SIZE(AO2, LDA, 2), %xmm10 movhpd -3 * SIZE(AO2, LDA, 2), %xmm10 pshufd $0x4e, %xmm8, %xmm9 pshufd $0x4e, %xmm10, %xmm11 mulpd %xmm12, %xmm8 mulpd %xmm12, %xmm9 mulpd %xmm12, %xmm10 mulpd %xmm12, %xmm11 movapd 0 * SIZE(BO), %xmm12 addpd %xmm8, %xmm4 ADD %xmm9, %xmm5 addpd %xmm10, %xmm6 ADD %xmm11, %xmm7 PREFETCH (PREFETCHSIZE + 8) * SIZE(AO2, LDA, 2) movsd -2 * SIZE(AO1), %xmm8 movhpd -1 * SIZE(AO1), %xmm8 movsd -2 * SIZE(AO2), %xmm10 movhpd -1 * SIZE(AO2), %xmm10 pshufd $0x4e, %xmm8, %xmm9 pshufd $0x4e, %xmm10, %xmm11 mulpd %xmm13, %xmm8 mulpd %xmm13, %xmm9 mulpd %xmm13, %xmm10 mulpd %xmm13, %xmm11 addpd %xmm8, %xmm0 ADD %xmm9, %xmm1 addpd %xmm10, %xmm2 ADD %xmm11, %xmm3 movsd -2 * SIZE(AO1, LDA, 2), %xmm8 movhpd -1 * SIZE(AO1, LDA, 2), %xmm8 movsd -2 * SIZE(AO2, LDA, 2), %xmm10 movhpd -1 * SIZE(AO2, LDA, 2), %xmm10 pshufd $0x4e, %xmm8, %xmm9 pshufd $0x4e, %xmm10, %xmm11 mulpd %xmm13, %xmm8 mulpd %xmm13, %xmm9 mulpd %xmm13, %xmm10 mulpd %xmm13, %xmm11 movapd 2 * SIZE(BO), %xmm13 addpd %xmm8, %xmm4 ADD %xmm9, %xmm5 addpd %xmm10, %xmm6 ADD %xmm11, %xmm7 subq $-16 * SIZE, AO1 subq $-16 * SIZE, AO2 subq $-16 * SIZE, BO decq I jg .L122 ALIGN_3.L124: movq MIN_N, I andq $4, I jle .L125 movsd -16 * SIZE(AO1), %xmm8 movhpd -15 * SIZE(AO1), %xmm8 pshufd $0x4e, %xmm8, %xmm9 mulpd %xmm12, %xmm8 addpd %xmm8, %xmm0 mulpd %xmm12, %xmm9 ADD %xmm9, %xmm1 movsd -16 * SIZE(AO2), %xmm10 movhpd -15 * SIZE(AO2), %xmm10 pshufd $0x4e, %xmm10, %xmm11 mulpd %xmm12, %xmm10 addpd %xmm10, %xmm2 mulpd %xmm12, %xmm11 ADD %xmm11, %xmm3 movsd -16 * SIZE(AO1, LDA, 2), %xmm8 movhpd -15 * SIZE(AO1, LDA, 2), %xmm8 pshufd $0x4e, %xmm8, %xmm9 mulpd %xmm12, %xmm8 addpd %xmm8, %xmm4 mulpd %xmm12, %xmm9 ADD %xmm9, %xmm5 movsd -16 * SIZE(AO2, LDA, 2), %xmm10 movhpd -15 * SIZE(AO2, LDA, 2), %xmm10 pshufd $0x4e, %xmm10, %xmm11 mulpd %xmm12, %xmm10 addpd %xmm10, %xmm6 mulpd %xmm12, %xmm11 ADD %xmm11, %xmm7 movapd -12 * SIZE(BO), %xmm12 movsd -14 * SIZE(AO1), %xmm8 movhpd -13 * SIZE(AO1), %xmm8 pshufd $0x4e, %xmm8, %xmm9 mulpd %xmm13, %xmm8 addpd %xmm8, %xmm0 mulpd %xmm13, %xmm9 ADD %xmm9, %xmm1 movsd -14 * SIZE(AO2), %xmm10 movhpd -13 * SIZE(AO2), %xmm10 pshufd $0x4e, %xmm10, %xmm11 mulpd %xmm13, %xmm10 addpd %xmm10, %xmm2 mulpd %xmm13, %xmm11 ADD %xmm11, %xmm3 movsd -14 * SIZE(AO1, LDA, 2), %xmm8 movhpd -13 * SIZE(AO1, LDA, 2), %xmm8 pshufd $0x4e, %xmm8, %xmm9 mulpd %xmm13, %xmm8 addpd %xmm8, %xmm4 mulpd %xmm13, %xmm9 ADD %xmm9, %xmm5 movsd -14 * SIZE(AO2, LDA, 2), %xmm10 movhpd -13 * SIZE(AO2, LDA, 2), %xmm10 pshufd $0x4e, %xmm10, %xmm11 mulpd %xmm13, %xmm10 addpd %xmm10, %xmm6 mulpd %xmm13, %xmm11 ADD %xmm11, %xmm7 movapd -10 * SIZE(BO), %xmm13 movsd -12 * SIZE(AO1), %xmm8 movhpd -11 * SIZE(AO1), %xmm8 pshufd $0x4e, %xmm8, %xmm9 mulpd %xmm12, %xmm8 addpd %xmm8, %xmm0 mulpd %xmm12, %xmm9 ADD %xmm9, %xmm1 movsd -12 * SIZE(AO2), %xmm10 movhpd -11 * SIZE(AO2), %xmm10 pshufd $0x4e, %xmm10, %xmm11 mulpd %xmm12, %xmm10 addpd %xmm10, %xmm2 mulpd %xmm12, %xmm11 ADD %xmm11, %xmm3 movsd -12 * SIZE(AO1, LDA, 2), %xmm8 movhpd -11 * SIZE(AO1, LDA, 2), %xmm8 pshufd $0x4e, %xmm8, %xmm9 mulpd %xmm12, %xmm8 addpd %xmm8, %xmm4 mulpd %xmm12, %xmm9 ADD %xmm9, %xmm5 movsd -12 * SIZE(AO2, LDA, 2), %xmm10 movhpd -11 * SIZE(AO2, LDA, 2), %xmm10 pshufd $0x4e, %xmm10, %xmm11 mulpd %xmm12, %xmm10 addpd %xmm10, %xmm6 mulpd %xmm12, %xmm11 ADD %xmm11, %xmm7 movapd -8 * SIZE(BO), %xmm12 movsd -10 * SIZE(AO1), %xmm8 movhpd -9 * SIZE(AO1), %xmm8 pshufd $0x4e, %xmm8, %xmm9 mulpd %xmm13, %xmm8 addpd %xmm8, %xmm0 mulpd %xmm13, %xmm9 ADD %xmm9, %xmm1 movsd -10 * SIZE(AO2), %xmm10 movhpd -9 * SIZE(AO2), %xmm10 pshufd $0x4e, %xmm10, %xmm11 mulpd %xmm13, %xmm10 addpd %xmm10, %xmm2 mulpd %xmm13, %xmm11 ADD %xmm11, %xmm3 movsd -10 * SIZE(AO1, LDA, 2), %xmm8 movhpd -9 * SIZE(AO1, LDA, 2), %xmm8 pshufd $0x4e, %xmm8, %xmm9 mulpd %xmm13, %xmm8 addpd %xmm8, %xmm4 mulpd %xmm13, %xmm9 ADD %xmm9, %xmm5 movsd -10 * SIZE(AO2, LDA, 2), %xmm10 movhpd -9 * SIZE(AO2, LDA, 2), %xmm10 pshufd $0x4e, %xmm10, %xmm11 mulpd %xmm13, %xmm10 addpd %xmm10, %xmm6 mulpd %xmm13, %xmm11 ADD %xmm11, %xmm7 movapd -6 * SIZE(BO), %xmm13 subq $-8 * SIZE, AO1 subq $-8 * SIZE, AO2 subq $-8 * SIZE, BO ALIGN_3.L125: movq MIN_N, I andq $2, I jle .L126 movsd -16 * SIZE(AO1), %xmm8 movhpd -15 * SIZE(AO1), %xmm8 pshufd $0x4e, %xmm8, %xmm9 mulpd %xmm12, %xmm8 addpd %xmm8, %xmm0 mulpd %xmm12, %xmm9 ADD %xmm9, %xmm1 movsd -16 * SIZE(AO2), %xmm10 movhpd -15 * SIZE(AO2), %xmm10 pshufd $0x4e, %xmm10, %xmm11 mulpd %xmm12, %xmm10 addpd %xmm10, %xmm2 mulpd %xmm12, %xmm11 ADD %xmm11, %xmm3 movsd -16 * SIZE(AO1, LDA, 2), %xmm8 movhpd -15 * SIZE(AO1, LDA, 2), %xmm8 pshufd $0x4e, %xmm8, %xmm9 mulpd %xmm12, %xmm8 addpd %xmm8, %xmm4 mulpd %xmm12, %xmm9 ADD %xmm9, %xmm5 movsd -16 * SIZE(AO2, LDA, 2), %xmm10 movhpd -15 * SIZE(AO2, LDA, 2), %xmm10 pshufd $0x4e, %xmm10, %xmm11 mulpd %xmm12, %xmm10 addpd %xmm10, %xmm6 mulpd %xmm12, %xmm11 ADD %xmm11, %xmm7 movapd -12 * SIZE(BO), %xmm12 movsd -14 * SIZE(AO1), %xmm8 movhpd -13 * SIZE(AO1), %xmm8 pshufd $0x4e, %xmm8, %xmm9 mulpd %xmm13, %xmm8 addpd %xmm8, %xmm0 mulpd %xmm13, %xmm9 ADD %xmm9, %xmm1 movsd -14 * SIZE(AO2), %xmm10 movhpd -13 * SIZE(AO2), %xmm10 pshufd $0x4e, %xmm10, %xmm11 mulpd %xmm13, %xmm10 addpd %xmm10, %xmm2 mulpd %xmm13, %xmm11 ADD %xmm11, %xmm3 movsd -14 * SIZE(AO1, LDA, 2), %xmm8 movhpd -13 * SIZE(AO1, LDA, 2), %xmm8 pshufd $0x4e, %xmm8, %xmm9 mulpd %xmm13, %xmm8 addpd %xmm8, %xmm4 mulpd %xmm13, %xmm9 ADD %xmm9, %xmm5 movsd -14 * SIZE(AO2, LDA, 2), %xmm10 movhpd -13 * SIZE(AO2, LDA, 2), %xmm10 pshufd $0x4e, %xmm10, %xmm11 mulpd %xmm13, %xmm10 addpd %xmm10, %xmm6 mulpd %xmm13, %xmm11 ADD %xmm11, %xmm7 movapd -10 * SIZE(BO), %xmm13 subq $-4 * SIZE, AO1 subq $-4 * SIZE, AO2 subq $-4 * SIZE, BO ALIGN_2.L126: movq MIN_N, I andq $1, I jle .L127 movsd -16 * SIZE(AO1), %xmm8 movhpd -15 * SIZE(AO1), %xmm8 pshufd $0x4e, %xmm8, %xmm9 mulpd %xmm12, %xmm8 addpd %xmm8, %xmm0 mulpd %xmm12, %xmm9 ADD %xmm9, %xmm1 movsd -16 * SIZE(AO2), %xmm10 movhpd -15 * SIZE(AO2), %xmm10 pshufd $0x4e, %xmm10, %xmm11 mulpd %xmm12, %xmm10 addpd %xmm10, %xmm2 mulpd %xmm12, %xmm11 ADD %xmm11, %xmm3 movsd -16 * SIZE(AO1, LDA, 2), %xmm8 movhpd -15 * SIZE(AO1, LDA, 2), %xmm8 pshufd $0x4e, %xmm8, %xmm9 mulpd %xmm12, %xmm8 addpd %xmm8, %xmm4 mulpd %xmm12, %xmm9 ADD %xmm9, %xmm5 movsd -16 * SIZE(AO2, LDA, 2), %xmm10 movhpd -15 * SIZE(AO2, LDA, 2), %xmm10 pshufd $0x4e, %xmm10, %xmm11 mulpd %xmm12, %xmm10 addpd %xmm10, %xmm6 mulpd %xmm12, %xmm11 ADD %xmm11, %xmm7 ALIGN_3.L127:#if (!defined(CONJ) && !defined(XCONJ)) || \ ( defined(CONJ) && defined(XCONJ)) pxor BETA, %xmm0 pxor BETA, %xmm2 pxor BETA, %xmm4 pxor BETA, %xmm6#else pxor BETA, %xmm1 pxor BETA, %xmm3 pxor BETA, %xmm5 pxor BETA, %xmm7#endif haddpd %xmm1, %xmm0 haddpd %xmm3, %xmm2 haddpd %xmm5, %xmm4 haddpd %xmm7, %xmm6 pshufd $0x4e, %xmm0, %xmm1 pshufd $0x4e, %xmm2, %xmm3 pshufd $0x4e, %xmm4, %xmm5 pshufd $0x4e, %xmm6, %xmm7
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -