📄 zaxpy_sse2_core2.s
字号:
addpd %xmm1, %xmm8 addpd %xmm3, %xmm9 addpd %xmm5, %xmm10 addpd %xmm7, %xmm11 MOVDDUP( 0 * SIZE, X, %xmm0) MOVDDUP( 1 * SIZE, X, %xmm1) addq INCX, X MOVDDUP( 0 * SIZE, X, %xmm2) MOVDDUP( 1 * SIZE, X, %xmm3) addq INCX, X MOVDDUP( 0 * SIZE, X, %xmm4) MOVDDUP( 1 * SIZE, X, %xmm5) addq INCX, X MOVDDUP( 0 * SIZE, X, %xmm6) MOVDDUP( 1 * SIZE, X, %xmm7) addq INCX, X mulpd %xmm14, %xmm0 mulpd %xmm14, %xmm2 mulpd %xmm14, %xmm4 mulpd %xmm14, %xmm6 movapd %xmm8, (YY) addq INCY, YY movapd %xmm9, (YY) addq INCY, YY movapd %xmm10, (YY) addq INCY, YY movapd %xmm11, (YY) addq INCY, YY movapd (Y), %xmm8 addq INCY, Y movapd (Y), %xmm9 addq INCY, Y movapd (Y), %xmm10 addq INCY, Y movapd (Y), %xmm11 addq INCY, Y decq %rax jg .L41 ALIGN_3.L42: addpd %xmm0, %xmm8 mulpd %xmm15, %xmm1 addpd %xmm2, %xmm9 mulpd %xmm15, %xmm3 addpd %xmm4, %xmm10 mulpd %xmm15, %xmm5 addpd %xmm6, %xmm11 mulpd %xmm15, %xmm7 addpd %xmm1, %xmm8 addpd %xmm3, %xmm9 addpd %xmm5, %xmm10 addpd %xmm7, %xmm11 MOVDDUP( 0 * SIZE, X, %xmm0) MOVDDUP( 1 * SIZE, X, %xmm1) addq INCX, X MOVDDUP( 0 * SIZE, X, %xmm2) MOVDDUP( 1 * SIZE, X, %xmm3) addq INCX, X MOVDDUP( 0 * SIZE, X, %xmm4) MOVDDUP( 1 * SIZE, X, %xmm5) addq INCX, X MOVDDUP( 0 * SIZE, X, %xmm6) MOVDDUP( 1 * SIZE, X, %xmm7) addq INCX, X mulpd %xmm14, %xmm0 mulpd %xmm14, %xmm2 mulpd %xmm14, %xmm4 mulpd %xmm14, %xmm6 movapd %xmm8, (YY) addq INCY, YY movapd %xmm9, (YY) addq INCY, YY movapd %xmm10, (YY) addq INCY, YY movapd %xmm11, (YY) addq INCY, YY movapd (Y), %xmm8 addq INCY, Y movapd (Y), %xmm9 addq INCY, Y movapd (Y), %xmm10 addq INCY, Y movapd (Y), %xmm11 addq INCY, Y addpd %xmm0, %xmm8 mulpd %xmm15, %xmm1 addpd %xmm2, %xmm9 mulpd %xmm15, %xmm3 addpd %xmm4, %xmm10 mulpd %xmm15, %xmm5 addpd %xmm6, %xmm11 mulpd %xmm15, %xmm7 addpd %xmm1, %xmm8 addpd %xmm3, %xmm9 addpd %xmm5, %xmm10 addpd %xmm7, %xmm11 movapd %xmm8, (YY) addq INCY, YY movapd %xmm9, (YY) addq INCY, YY movapd %xmm10, (YY) addq INCY, YY movapd %xmm11, (YY) addq INCY, YY ALIGN_3.L45: movq M, %rax andq $4, %rax jle .L46 MOVDDUP( 0 * SIZE, X, %xmm0) MOVDDUP( 1 * SIZE, X, %xmm1) addq INCX, X MOVDDUP( 0 * SIZE, X, %xmm2) MOVDDUP( 1 * SIZE, X, %xmm3) addq INCX, X MOVDDUP( 0 * SIZE, X, %xmm4) MOVDDUP( 1 * SIZE, X, %xmm5) addq INCX, X MOVDDUP( 0 * SIZE, X, %xmm6) MOVDDUP( 1 * SIZE, X, %xmm7) addq INCX, X movapd (Y), %xmm8 addq INCY, Y movapd (Y), %xmm9 addq INCY, Y movapd (Y), %xmm10 addq INCY, Y movapd (Y), %xmm11 addq INCY, Y mulpd %xmm14, %xmm0 mulpd %xmm14, %xmm2 mulpd %xmm14, %xmm4 mulpd %xmm14, %xmm6 addpd %xmm0, %xmm8 mulpd %xmm15, %xmm1 addpd %xmm2, %xmm9 mulpd %xmm15, %xmm3 addpd %xmm4, %xmm10 mulpd %xmm15, %xmm5 addpd %xmm6, %xmm11 mulpd %xmm15, %xmm7 addpd %xmm1, %xmm8 addpd %xmm3, %xmm9 addpd %xmm5, %xmm10 addpd %xmm7, %xmm11 movapd %xmm8, (YY) addq INCY, YY movapd %xmm9, (YY) addq INCY, YY movapd %xmm10, (YY) addq INCY, YY movapd %xmm11, (YY) addq INCY, YY ALIGN_3.L46: movq M, %rax andq $2, %rax jle .L47 MOVDDUP( 0 * SIZE, X, %xmm0) MOVDDUP( 1 * SIZE, X, %xmm1) addq INCX, X MOVDDUP( 0 * SIZE, X, %xmm2) MOVDDUP( 1 * SIZE, X, %xmm3) addq INCX, X movapd (Y), %xmm8 addq INCY, Y movapd (Y), %xmm9 addq INCY, Y mulpd %xmm14, %xmm0 mulpd %xmm14, %xmm2 mulpd %xmm15, %xmm1 mulpd %xmm15, %xmm3 addpd %xmm0, %xmm8 addpd %xmm2, %xmm9 addpd %xmm1, %xmm8 addpd %xmm3, %xmm9 movapd %xmm8, (YY) addq INCY, YY movapd %xmm9, (YY) addq INCY, YY ALIGN_3.L47: movq M, %rax andq $1, %rax jle .L999 MOVDDUP( 0 * SIZE, X, %xmm0) MOVDDUP( 1 * SIZE, X, %xmm1) movapd (Y), %xmm8 mulpd %xmm14, %xmm0 mulpd %xmm15, %xmm1 addpd %xmm0, %xmm8 addpd %xmm1, %xmm8 movapd %xmm8, (YY) jmp .L999 ALIGN_3.L50: movq Y, YY movq M, %rax sarq $3, %rax jle .L55 MOVDDUP( 0 * SIZE, X, %xmm0) MOVDDUP( 1 * SIZE, X, %xmm1) addq INCX, X MOVDDUP( 0 * SIZE, X, %xmm2) MOVDDUP( 1 * SIZE, X, %xmm3) addq INCX, X MOVDDUP( 0 * SIZE, X, %xmm4) MOVDDUP( 1 * SIZE, X, %xmm5) addq INCX, X MOVDDUP( 0 * SIZE, X, %xmm6) MOVDDUP( 1 * SIZE, X, %xmm7) addq INCX, X movsd 0 * SIZE(Y), %xmm8 movhpd 1 * SIZE(Y), %xmm8 addq INCY, Y movsd 0 * SIZE(Y), %xmm9 movhpd 1 * SIZE(Y), %xmm9 addq INCY, Y movsd 0 * SIZE(Y), %xmm10 movhpd 1 * SIZE(Y), %xmm10 addq INCY, Y movsd 0 * SIZE(Y), %xmm11 movhpd 1 * SIZE(Y), %xmm11 addq INCY, Y mulpd %xmm14, %xmm0 mulpd %xmm14, %xmm2 mulpd %xmm14, %xmm4 mulpd %xmm14, %xmm6 decq %rax jle .L52 ALIGN_3.L51: addpd %xmm0, %xmm8 mulpd %xmm15, %xmm1 addpd %xmm2, %xmm9 mulpd %xmm15, %xmm3 addpd %xmm4, %xmm10 mulpd %xmm15, %xmm5 addpd %xmm6, %xmm11 mulpd %xmm15, %xmm7 addpd %xmm1, %xmm8 addpd %xmm3, %xmm9 addpd %xmm5, %xmm10 addpd %xmm7, %xmm11 MOVDDUP( 0 * SIZE, X, %xmm0) MOVDDUP( 1 * SIZE, X, %xmm1) addq INCX, X MOVDDUP( 0 * SIZE, X, %xmm2) MOVDDUP( 1 * SIZE, X, %xmm3) addq INCX, X MOVDDUP( 0 * SIZE, X, %xmm4) MOVDDUP( 1 * SIZE, X, %xmm5) addq INCX, X MOVDDUP( 0 * SIZE, X, %xmm6) MOVDDUP( 1 * SIZE, X, %xmm7) addq INCX, X mulpd %xmm14, %xmm0 mulpd %xmm14, %xmm2 mulpd %xmm14, %xmm4 mulpd %xmm14, %xmm6 movsd %xmm8, 0 * SIZE(YY) movhpd %xmm8, 1 * SIZE(YY) addq INCY, YY movsd %xmm9, 0 * SIZE(YY) movhpd %xmm9, 1 * SIZE(YY) addq INCY, YY movsd %xmm10, 0 * SIZE(YY) movhpd %xmm10, 1 * SIZE(YY) addq INCY, YY movsd %xmm11, 0 * SIZE(YY) movhpd %xmm11, 1 * SIZE(YY) addq INCY, YY movsd 0 * SIZE(Y), %xmm8 movhpd 1 * SIZE(Y), %xmm8 addq INCY, Y movsd 0 * SIZE(Y), %xmm9 movhpd 1 * SIZE(Y), %xmm9 addq INCY, Y movsd 0 * SIZE(Y), %xmm10 movhpd 1 * SIZE(Y), %xmm10 addq INCY, Y movsd 0 * SIZE(Y), %xmm11 movhpd 1 * SIZE(Y), %xmm11 addq INCY, Y addpd %xmm0, %xmm8 mulpd %xmm15, %xmm1 addpd %xmm2, %xmm9 mulpd %xmm15, %xmm3 addpd %xmm4, %xmm10 mulpd %xmm15, %xmm5 addpd %xmm6, %xmm11 mulpd %xmm15, %xmm7 addpd %xmm1, %xmm8 addpd %xmm3, %xmm9 addpd %xmm5, %xmm10 addpd %xmm7, %xmm11 MOVDDUP( 0 * SIZE, X, %xmm0) MOVDDUP( 1 * SIZE, X, %xmm1) addq INCX, X MOVDDUP( 0 * SIZE, X, %xmm2) MOVDDUP( 1 * SIZE, X, %xmm3) addq INCX, X MOVDDUP( 0 * SIZE, X, %xmm4) MOVDDUP( 1 * SIZE, X, %xmm5) addq INCX, X MOVDDUP( 0 * SIZE, X, %xmm6) MOVDDUP( 1 * SIZE, X, %xmm7) addq INCX, X mulpd %xmm14, %xmm0 mulpd %xmm14, %xmm2 mulpd %xmm14, %xmm4 mulpd %xmm14, %xmm6 movsd %xmm8, 0 * SIZE(YY) movhpd %xmm8, 1 * SIZE(YY) addq INCY, YY movsd %xmm9, 0 * SIZE(YY) movhpd %xmm9, 1 * SIZE(YY) addq INCY, YY movsd %xmm10, 0 * SIZE(YY) movhpd %xmm10, 1 * SIZE(YY) addq INCY, YY movsd %xmm11, 0 * SIZE(YY) movhpd %xmm11, 1 * SIZE(YY) addq INCY, YY movsd 0 * SIZE(Y), %xmm8 movhpd 1 * SIZE(Y), %xmm8 addq INCY, Y movsd 0 * SIZE(Y), %xmm9 movhpd 1 * SIZE(Y), %xmm9 addq INCY, Y movsd 0 * SIZE(Y), %xmm10 movhpd 1 * SIZE(Y), %xmm10 addq INCY, Y movsd 0 * SIZE(Y), %xmm11 movhpd 1 * SIZE(Y), %xmm11 addq INCY, Y decq %rax jg .L51 ALIGN_3.L52: addpd %xmm0, %xmm8 mulpd %xmm15, %xmm1 addpd %xmm2, %xmm9 mulpd %xmm15, %xmm3 addpd %xmm4, %xmm10 mulpd %xmm15, %xmm5 addpd %xmm6, %xmm11 mulpd %xmm15, %xmm7 addpd %xmm1, %xmm8 addpd %xmm3, %xmm9 addpd %xmm5, %xmm10 addpd %xmm7, %xmm11 MOVDDUP( 0 * SIZE, X, %xmm0) MOVDDUP( 1 * SIZE, X, %xmm1) addq INCX, X MOVDDUP( 0 * SIZE, X, %xmm2) MOVDDUP( 1 * SIZE, X, %xmm3) addq INCX, X MOVDDUP( 0 * SIZE, X, %xmm4) MOVDDUP( 1 * SIZE, X, %xmm5) addq INCX, X MOVDDUP( 0 * SIZE, X, %xmm6) MOVDDUP( 1 * SIZE, X, %xmm7) addq INCX, X mulpd %xmm14, %xmm0 mulpd %xmm14, %xmm2 mulpd %xmm14, %xmm4 mulpd %xmm14, %xmm6 movsd %xmm8, 0 * SIZE(YY) movhpd %xmm8, 1 * SIZE(YY) addq INCY, YY movsd %xmm9, 0 * SIZE(YY) movhpd %xmm9, 1 * SIZE(YY) addq INCY, YY movsd %xmm10, 0 * SIZE(YY) movhpd %xmm10, 1 * SIZE(YY) addq INCY, YY movsd %xmm11, 0 * SIZE(YY) movhpd %xmm11, 1 * SIZE(YY) addq INCY, YY movsd 0 * SIZE(Y), %xmm8 movhpd 1 * SIZE(Y), %xmm8 addq INCY, Y movsd 0 * SIZE(Y), %xmm9 movhpd 1 * SIZE(Y), %xmm9 addq INCY, Y movsd 0 * SIZE(Y), %xmm10 movhpd 1 * SIZE(Y), %xmm10 addq INCY, Y movsd 0 * SIZE(Y), %xmm11 movhpd 1 * SIZE(Y), %xmm11 addq INCY, Y addpd %xmm0, %xmm8 mulpd %xmm15, %xmm1 addpd %xmm2, %xmm9 mulpd %xmm15, %xmm3 addpd %xmm4, %xmm10 mulpd %xmm15, %xmm5 addpd %xmm6, %xmm11 mulpd %xmm15, %xmm7 addpd %xmm1, %xmm8 addpd %xmm3, %xmm9 addpd %xmm5, %xmm10 addpd %xmm7, %xmm11 movsd %xmm8, 0 * SIZE(YY) movhpd %xmm8, 1 * SIZE(YY) addq INCY, YY movsd %xmm9, 0 * SIZE(YY) movhpd %xmm9, 1 * SIZE(YY) addq INCY, YY movsd %xmm10, 0 * SIZE(YY) movhpd %xmm10, 1 * SIZE(YY) addq INCY, YY movsd %xmm11, 0 * SIZE(YY) movhpd %xmm11, 1 * SIZE(YY) addq INCY, YY ALIGN_3.L55: movq M, %rax andq $4, %rax jle .L56 MOVDDUP( 0 * SIZE, X, %xmm0) MOVDDUP( 1 * SIZE, X, %xmm1) addq INCX, X MOVDDUP( 0 * SIZE, X, %xmm2) MOVDDUP( 1 * SIZE, X, %xmm3) addq INCX, X MOVDDUP( 0 * SIZE, X, %xmm4) MOVDDUP( 1 * SIZE, X, %xmm5) addq INCX, X MOVDDUP( 0 * SIZE, X, %xmm6) MOVDDUP( 1 * SIZE, X, %xmm7) addq INCX, X movsd 0 * SIZE(Y), %xmm8 movhpd 1 * SIZE(Y), %xmm8 addq INCY, Y movsd 0 * SIZE(Y), %xmm9 movhpd 1 * SIZE(Y), %xmm9 addq INCY, Y movsd 0 * SIZE(Y), %xmm10 movhpd 1 * SIZE(Y), %xmm10 addq INCY, Y movsd 0 * SIZE(Y), %xmm11 movhpd 1 * SIZE(Y), %xmm11 addq INCY, Y mulpd %xmm14, %xmm0 mulpd %xmm14, %xmm2 mulpd %xmm14, %xmm4 mulpd %xmm14, %xmm6 addpd %xmm0, %xmm8 mulpd %xmm15, %xmm1 addpd %xmm2, %xmm9 mulpd %xmm15, %xmm3 addpd %xmm4, %xmm10 mulpd %xmm15, %xmm5 addpd %xmm6, %xmm11 mulpd %xmm15, %xmm7 addpd %xmm1, %xmm8 addpd %xmm3, %xmm9 addpd %xmm5, %xmm10 addpd %xmm7, %xmm11 movsd %xmm8, 0 * SIZE(YY) movhpd %xmm8, 1 * SIZE(YY) addq INCY, YY movsd %xmm9, 0 * SIZE(YY) movhpd %xmm9, 1 * SIZE(YY) addq INCY, YY movsd %xmm10, 0 * SIZE(YY) movhpd %xmm10, 1 * SIZE(YY) addq INCY, YY movsd %xmm11, 0 * SIZE(YY) movhpd %xmm11, 1 * SIZE(YY) addq INCY, YY ALIGN_3.L56: movq M, %rax andq $2, %rax jle .L57 MOVDDUP( 0 * SIZE, X, %xmm0) MOVDDUP( 1 * SIZE, X, %xmm1) addq INCX, X MOVDDUP( 0 * SIZE, X, %xmm2) MOVDDUP( 1 * SIZE, X, %xmm3) addq INCX, X movsd 0 * SIZE(Y), %xmm8 movhpd 1 * SIZE(Y), %xmm8 addq INCY, Y movsd 0 * SIZE(Y), %xmm9 movhpd 1 * SIZE(Y), %xmm9 addq INCY, Y mulpd %xmm14, %xmm0 mulpd %xmm14, %xmm2 mulpd %xmm15, %xmm1 mulpd %xmm15, %xmm3 addpd %xmm0, %xmm8 addpd %xmm2, %xmm9 addpd %xmm1, %xmm8 addpd %xmm3, %xmm9 movsd %xmm8, 0 * SIZE(YY) movhpd %xmm8, 1 * SIZE(YY) addq INCY, YY movsd %xmm9, 0 * SIZE(YY) movhpd %xmm9, 1 * SIZE(YY) addq INCY, YY ALIGN_3.L57: movq M, %rax andq $1, %rax jle .L999 MOVDDUP( 0 * SIZE, X, %xmm0) MOVDDUP( 1 * SIZE, X, %xmm1) movsd 0 * SIZE(Y), %xmm8 movhpd 1 * SIZE(Y), %xmm8 mulpd %xmm14, %xmm0 mulpd %xmm15, %xmm1 addpd %xmm0, %xmm8 addpd %xmm1, %xmm8 movsd %xmm8, 0 * SIZE(YY) movhpd %xmm8, 1 * SIZE(YY) ALIGN_3.L999: xorq %rax, %rax RESTOREREGISTERS ret EPILOGUE
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -