📄 zscal_sse2.s
字号:
addpd %xmm3, %xmm9 addpd %xmm5, %xmm10 addpd %xmm7, %xmm11 movapd %xmm0, 0 * SIZE(XX) addq INCX, XX movapd %xmm2, 0 * SIZE(XX) addq INCX, XX movapd %xmm4, 0 * SIZE(XX) addq INCX, XX movapd %xmm6, 0 * SIZE(XX) addq INCX, XX movapd %xmm8, 0 * SIZE(XX) addq INCX, XX movapd %xmm9, 0 * SIZE(XX) addq INCX, XX movapd %xmm10, 0 * SIZE(XX) addq INCX, XX movapd %xmm11, 0 * SIZE(XX) addq INCX, XX ALIGN_3.L125: testq $7, M je .L999 testq $4, M je .L126 MOVDDUP(0 * SIZE, X, %xmm0) MOVDDUP(1 * SIZE, X, %xmm1) addq INCX, X MOVDDUP(0 * SIZE, X, %xmm2) MOVDDUP(1 * SIZE, X, %xmm3) addq INCX, X MOVDDUP(0 * SIZE, X, %xmm4) MOVDDUP(1 * SIZE, X, %xmm5) addq INCX, X MOVDDUP(0 * SIZE, X, %xmm6) MOVDDUP(1 * SIZE, X, %xmm7) addq INCX, X mulpd %xmm14, %xmm0 mulpd %xmm15, %xmm1 mulpd %xmm14, %xmm2 mulpd %xmm15, %xmm3 mulpd %xmm14, %xmm4 mulpd %xmm15, %xmm5 mulpd %xmm14, %xmm6 mulpd %xmm15, %xmm7 addpd %xmm1, %xmm0 addpd %xmm3, %xmm2 addpd %xmm5, %xmm4 addpd %xmm7, %xmm6 movapd %xmm0, 0 * SIZE(XX) addq INCX, XX movapd %xmm2, 0 * SIZE(XX) addq INCX, XX movapd %xmm4, 0 * SIZE(XX) addq INCX, XX movapd %xmm6, 0 * SIZE(XX) addq INCX, XX ALIGN_3.L126: testq $2, M je .L127 MOVDDUP(0 * SIZE, X, %xmm0) MOVDDUP(1 * SIZE, X, %xmm1) addq INCX, X MOVDDUP(0 * SIZE, X, %xmm2) MOVDDUP(1 * SIZE, X, %xmm3) addq INCX, X mulpd %xmm14, %xmm0 mulpd %xmm15, %xmm1 mulpd %xmm14, %xmm2 mulpd %xmm15, %xmm3 addpd %xmm1, %xmm0 addpd %xmm3, %xmm2 movapd %xmm0, 0 * SIZE(XX) addq INCX, XX movapd %xmm2, 0 * SIZE(XX) addq INCX, XX ALIGN_3.L127: testq $1, M je .L999 MOVDDUP(0 * SIZE, X, %xmm0) MOVDDUP(1 * SIZE, X, %xmm1) mulpd %xmm14, %xmm0 mulpd %xmm15, %xmm1 addpd %xmm1, %xmm0 movapd %xmm0, 0 * SIZE(XX) jmp .L999 ALIGN_3/* Unaligned */.L200: cmpq $2 * SIZE, INCX jne .L220.L210: movq M, I # rcx = n sarq $3, I jle .L215 MOVDDUP(0 * SIZE, X, %xmm0) MOVDDUP(1 * SIZE, X, %xmm1) MOVDDUP(2 * SIZE, X, %xmm2) MOVDDUP(3 * SIZE, X, %xmm3) MOVDDUP(4 * SIZE, X, %xmm4) MOVDDUP(5 * SIZE, X, %xmm5) MOVDDUP(6 * SIZE, X, %xmm6) MOVDDUP(7 * SIZE, X, %xmm7) decq I jle .L212 ALIGN_4.L211: mulpd %xmm14, %xmm0 mulpd %xmm15, %xmm1 mulpd %xmm14, %xmm2 mulpd %xmm15, %xmm3 mulpd %xmm14, %xmm4 mulpd %xmm15, %xmm5 mulpd %xmm14, %xmm6 mulpd %xmm15, %xmm7 addpd %xmm1, %xmm0 addpd %xmm3, %xmm2 MOVDDUP( 8 * SIZE, X, %xmm8) MOVDDUP( 9 * SIZE, X, %xmm1) MOVDDUP(10 * SIZE, X, %xmm9) MOVDDUP(11 * SIZE, X, %xmm3) addpd %xmm5, %xmm4 addpd %xmm7, %xmm6 MOVDDUP(12 * SIZE, X, %xmm10) MOVDDUP(13 * SIZE, X, %xmm5) MOVDDUP(14 * SIZE, X, %xmm11) MOVDDUP(15 * SIZE, X, %xmm7) mulpd %xmm14, %xmm8 mulpd %xmm15, %xmm1 mulpd %xmm14, %xmm9 mulpd %xmm15, %xmm3 mulpd %xmm14, %xmm10 mulpd %xmm15, %xmm5 mulpd %xmm14, %xmm11 mulpd %xmm15, %xmm7 addpd %xmm1, %xmm8 addpd %xmm3, %xmm9 addpd %xmm5, %xmm10 addpd %xmm7, %xmm11 movlpd %xmm0, 0 * SIZE(X) movhpd %xmm0, 1 * SIZE(X) movlpd %xmm2, 2 * SIZE(X) movhpd %xmm2, 3 * SIZE(X) movlpd %xmm4, 4 * SIZE(X) movhpd %xmm4, 5 * SIZE(X) movlpd %xmm6, 6 * SIZE(X) movhpd %xmm6, 7 * SIZE(X) MOVDDUP(16 * SIZE, X, %xmm0) MOVDDUP(17 * SIZE, X, %xmm1) MOVDDUP(18 * SIZE, X, %xmm2) MOVDDUP(19 * SIZE, X, %xmm3) MOVDDUP(20 * SIZE, X, %xmm4) MOVDDUP(21 * SIZE, X, %xmm5) MOVDDUP(22 * SIZE, X, %xmm6) MOVDDUP(23 * SIZE, X, %xmm7) movlpd %xmm8, 8 * SIZE(X) movhpd %xmm8, 9 * SIZE(X) movlpd %xmm9, 10 * SIZE(X) movhpd %xmm9, 11 * SIZE(X) movlpd %xmm10, 12 * SIZE(X) movhpd %xmm10, 13 * SIZE(X) movlpd %xmm11, 14 * SIZE(X) movhpd %xmm11, 15 * SIZE(X) addq $16 * SIZE, X decq I jg .L211 ALIGN_4.L212: mulpd %xmm14, %xmm0 mulpd %xmm15, %xmm1 mulpd %xmm14, %xmm2 mulpd %xmm15, %xmm3 mulpd %xmm14, %xmm4 mulpd %xmm15, %xmm5 mulpd %xmm14, %xmm6 mulpd %xmm15, %xmm7 addpd %xmm1, %xmm0 addpd %xmm3, %xmm2 MOVDDUP( 8 * SIZE, X, %xmm8) MOVDDUP( 9 * SIZE, X, %xmm1) MOVDDUP(10 * SIZE, X, %xmm9) MOVDDUP(11 * SIZE, X, %xmm3) addpd %xmm5, %xmm4 addpd %xmm7, %xmm6 MOVDDUP(12 * SIZE, X, %xmm10) MOVDDUP(13 * SIZE, X, %xmm5) MOVDDUP(14 * SIZE, X, %xmm11) MOVDDUP(15 * SIZE, X, %xmm7) mulpd %xmm14, %xmm8 mulpd %xmm15, %xmm1 mulpd %xmm14, %xmm9 mulpd %xmm15, %xmm3 mulpd %xmm14, %xmm10 mulpd %xmm15, %xmm5 mulpd %xmm14, %xmm11 mulpd %xmm15, %xmm7 addpd %xmm1, %xmm8 addpd %xmm3, %xmm9 addpd %xmm5, %xmm10 addpd %xmm7, %xmm11 movlpd %xmm0, 0 * SIZE(X) movhpd %xmm0, 1 * SIZE(X) movlpd %xmm2, 2 * SIZE(X) movhpd %xmm2, 3 * SIZE(X) movlpd %xmm4, 4 * SIZE(X) movhpd %xmm4, 5 * SIZE(X) movlpd %xmm6, 6 * SIZE(X) movhpd %xmm6, 7 * SIZE(X) movlpd %xmm8, 8 * SIZE(X) movhpd %xmm8, 9 * SIZE(X) movlpd %xmm9, 10 * SIZE(X) movhpd %xmm9, 11 * SIZE(X) movlpd %xmm10, 12 * SIZE(X) movhpd %xmm10, 13 * SIZE(X) movlpd %xmm11, 14 * SIZE(X) movhpd %xmm11, 15 * SIZE(X) addq $16 * SIZE, X ALIGN_3.L215: testq $7, M je .L999 testq $4, M je .L216 MOVDDUP(0 * SIZE, X, %xmm0) MOVDDUP(1 * SIZE, X, %xmm1) MOVDDUP(2 * SIZE, X, %xmm2) MOVDDUP(3 * SIZE, X, %xmm3) MOVDDUP(4 * SIZE, X, %xmm4) MOVDDUP(5 * SIZE, X, %xmm5) MOVDDUP(6 * SIZE, X, %xmm6) MOVDDUP(7 * SIZE, X, %xmm7) mulpd %xmm14, %xmm0 mulpd %xmm15, %xmm1 mulpd %xmm14, %xmm2 mulpd %xmm15, %xmm3 mulpd %xmm14, %xmm4 mulpd %xmm15, %xmm5 mulpd %xmm14, %xmm6 mulpd %xmm15, %xmm7 addpd %xmm1, %xmm0 addpd %xmm3, %xmm2 addpd %xmm5, %xmm4 addpd %xmm7, %xmm6 movlpd %xmm0, 0 * SIZE(X) movhpd %xmm0, 1 * SIZE(X) movlpd %xmm2, 2 * SIZE(X) movhpd %xmm2, 3 * SIZE(X) movlpd %xmm4, 4 * SIZE(X) movhpd %xmm4, 5 * SIZE(X) movlpd %xmm6, 6 * SIZE(X) movhpd %xmm6, 7 * SIZE(X) addq $8 * SIZE, X ALIGN_3.L216: testq $2, M je .L217 MOVDDUP(0 * SIZE, X, %xmm0) MOVDDUP(1 * SIZE, X, %xmm1) MOVDDUP(2 * SIZE, X, %xmm2) MOVDDUP(3 * SIZE, X, %xmm3) mulpd %xmm14, %xmm0 mulpd %xmm15, %xmm1 mulpd %xmm14, %xmm2 mulpd %xmm15, %xmm3 addpd %xmm1, %xmm0 addpd %xmm3, %xmm2 movlpd %xmm0, 0 * SIZE(X) movhpd %xmm0, 1 * SIZE(X) movlpd %xmm2, 2 * SIZE(X) movhpd %xmm2, 3 * SIZE(X) addq $4 * SIZE, X ALIGN_3.L217: testq $1, M je .L999 MOVDDUP(0 * SIZE, X, %xmm0) MOVDDUP(1 * SIZE, X, %xmm1) mulpd %xmm14, %xmm0 mulpd %xmm15, %xmm1 addpd %xmm1, %xmm0 movlpd %xmm0, 0 * SIZE(X) movhpd %xmm0, 1 * SIZE(X) jmp .L999 ALIGN_3.L220: movq X, XX movq M, I # rcx = n sarq $3, I jle .L225 MOVDDUP(0 * SIZE, X, %xmm0) MOVDDUP(1 * SIZE, X, %xmm1) addq INCX, X MOVDDUP(0 * SIZE, X, %xmm2) MOVDDUP(1 * SIZE, X, %xmm3) addq INCX, X MOVDDUP(0 * SIZE, X, %xmm4) MOVDDUP(1 * SIZE, X, %xmm5) addq INCX, X MOVDDUP(0 * SIZE, X, %xmm6) MOVDDUP(1 * SIZE, X, %xmm7) addq INCX, X decq I jle .L222 ALIGN_4.L221: mulpd %xmm14, %xmm0 mulpd %xmm15, %xmm1 mulpd %xmm14, %xmm2 mulpd %xmm15, %xmm3 mulpd %xmm14, %xmm4 mulpd %xmm15, %xmm5 mulpd %xmm14, %xmm6 mulpd %xmm15, %xmm7 addpd %xmm1, %xmm0 addpd %xmm3, %xmm2 MOVDDUP( 0 * SIZE, X, %xmm8) MOVDDUP( 1 * SIZE, X, %xmm1) addq INCX, X MOVDDUP( 0 * SIZE, X, %xmm9) MOVDDUP( 1 * SIZE, X, %xmm3) addq INCX, X addpd %xmm5, %xmm4 addpd %xmm7, %xmm6 MOVDDUP( 0 * SIZE, X, %xmm10) MOVDDUP( 1 * SIZE, X, %xmm5) addq INCX, X MOVDDUP( 0 * SIZE, X, %xmm11) MOVDDUP( 1 * SIZE, X, %xmm7) addq INCX, X mulpd %xmm14, %xmm8 mulpd %xmm15, %xmm1 mulpd %xmm14, %xmm9 mulpd %xmm15, %xmm3 mulpd %xmm14, %xmm10 mulpd %xmm15, %xmm5 mulpd %xmm14, %xmm11 mulpd %xmm15, %xmm7 addpd %xmm1, %xmm8 addpd %xmm3, %xmm9 addpd %xmm5, %xmm10 addpd %xmm7, %xmm11 movlpd %xmm0, 0 * SIZE(XX) movhpd %xmm0, 1 * SIZE(XX) addq INCX, XX movlpd %xmm2, 0 * SIZE(XX) movhpd %xmm2, 1 * SIZE(XX) addq INCX, XX movlpd %xmm4, 0 * SIZE(XX) movhpd %xmm4, 1 * SIZE(XX) addq INCX, XX movlpd %xmm6, 0 * SIZE(XX) movhpd %xmm6, 1 * SIZE(XX) addq INCX, XX MOVDDUP( 0 * SIZE, X, %xmm0) MOVDDUP( 1 * SIZE, X, %xmm1) addq INCX, X MOVDDUP( 0 * SIZE, X, %xmm2) MOVDDUP( 1 * SIZE, X, %xmm3) addq INCX, X MOVDDUP( 0 * SIZE, X, %xmm4) MOVDDUP( 1 * SIZE, X, %xmm5) addq INCX, X MOVDDUP( 0 * SIZE, X, %xmm6) MOVDDUP( 1 * SIZE, X, %xmm7) addq INCX, X movlpd %xmm8, 0 * SIZE(XX) movhpd %xmm8, 1 * SIZE(XX) addq INCX, XX movlpd %xmm9, 0 * SIZE(XX) movhpd %xmm9, 1 * SIZE(XX) addq INCX, XX movlpd %xmm10, 0 * SIZE(XX) movhpd %xmm10, 1 * SIZE(XX) addq INCX, XX movlpd %xmm11, 0 * SIZE(XX) movhpd %xmm11, 1 * SIZE(XX) addq INCX, XX decq I jg .L221 ALIGN_4.L222: mulpd %xmm14, %xmm0 mulpd %xmm15, %xmm1 mulpd %xmm14, %xmm2 mulpd %xmm15, %xmm3 mulpd %xmm14, %xmm4 mulpd %xmm15, %xmm5 mulpd %xmm14, %xmm6 mulpd %xmm15, %xmm7 addpd %xmm1, %xmm0 addpd %xmm3, %xmm2 MOVDDUP( 0 * SIZE, X, %xmm8) MOVDDUP( 1 * SIZE, X, %xmm1) addq INCX, X MOVDDUP( 0 * SIZE, X, %xmm9) MOVDDUP( 1 * SIZE, X, %xmm3) addq INCX, X addpd %xmm5, %xmm4 addpd %xmm7, %xmm6 MOVDDUP( 0 * SIZE, X, %xmm10) MOVDDUP( 1 * SIZE, X, %xmm5) addq INCX, X MOVDDUP( 0 * SIZE, X, %xmm11) MOVDDUP( 1 * SIZE, X, %xmm7) addq INCX, X mulpd %xmm14, %xmm8 mulpd %xmm15, %xmm1 mulpd %xmm14, %xmm9 mulpd %xmm15, %xmm3 mulpd %xmm14, %xmm10 mulpd %xmm15, %xmm5 mulpd %xmm14, %xmm11 mulpd %xmm15, %xmm7 addpd %xmm1, %xmm8 addpd %xmm3, %xmm9 addpd %xmm5, %xmm10 addpd %xmm7, %xmm11 movlpd %xmm0, 0 * SIZE(XX) movhpd %xmm0, 1 * SIZE(XX) addq INCX, XX movlpd %xmm2, 0 * SIZE(XX) movhpd %xmm2, 1 * SIZE(XX) addq INCX, XX movlpd %xmm4, 0 * SIZE(XX) movhpd %xmm4, 1 * SIZE(XX) addq INCX, XX movlpd %xmm6, 0 * SIZE(XX) movhpd %xmm6, 1 * SIZE(XX) addq INCX, XX movlpd %xmm8, 0 * SIZE(XX) movhpd %xmm8, 1 * SIZE(XX) addq INCX, XX movlpd %xmm9, 0 * SIZE(XX) movhpd %xmm9, 1 * SIZE(XX) addq INCX, XX movlpd %xmm10, 0 * SIZE(XX) movhpd %xmm10, 1 * SIZE(XX) addq INCX, XX movlpd %xmm11, 0 * SIZE(XX) movhpd %xmm11, 1 * SIZE(XX) addq INCX, XX ALIGN_3.L225: testq $7, M je .L999 testq $4, M je .L226 MOVDDUP(0 * SIZE, X, %xmm0) MOVDDUP(1 * SIZE, X, %xmm1) addq INCX, X MOVDDUP(0 * SIZE, X, %xmm2) MOVDDUP(1 * SIZE, X, %xmm3) addq INCX, X MOVDDUP(0 * SIZE, X, %xmm4) MOVDDUP(1 * SIZE, X, %xmm5) addq INCX, X MOVDDUP(0 * SIZE, X, %xmm6) MOVDDUP(1 * SIZE, X, %xmm7) addq INCX, X mulpd %xmm14, %xmm0 mulpd %xmm15, %xmm1 mulpd %xmm14, %xmm2 mulpd %xmm15, %xmm3 mulpd %xmm14, %xmm4 mulpd %xmm15, %xmm5 mulpd %xmm14, %xmm6 mulpd %xmm15, %xmm7 addpd %xmm1, %xmm0 addpd %xmm3, %xmm2 addpd %xmm5, %xmm4 addpd %xmm7, %xmm6 movlpd %xmm0, 0 * SIZE(XX) movhpd %xmm0, 1 * SIZE(XX) addq INCX, XX movlpd %xmm2, 0 * SIZE(XX) movhpd %xmm2, 1 * SIZE(XX) addq INCX, XX movlpd %xmm4, 0 * SIZE(XX) movhpd %xmm4, 1 * SIZE(XX) addq INCX, XX movlpd %xmm6, 0 * SIZE(XX) movhpd %xmm6, 1 * SIZE(XX) addq INCX, XX ALIGN_3.L226: testq $2, M je .L227 MOVDDUP(0 * SIZE, X, %xmm0) MOVDDUP(1 * SIZE, X, %xmm1) addq INCX, X MOVDDUP(0 * SIZE, X, %xmm2) MOVDDUP(1 * SIZE, X, %xmm3) addq INCX, X mulpd %xmm14, %xmm0 mulpd %xmm15, %xmm1 mulpd %xmm14, %xmm2 mulpd %xmm15, %xmm3 addpd %xmm1, %xmm0 addpd %xmm3, %xmm2 movlpd %xmm0, 0 * SIZE(XX) movhpd %xmm0, 1 * SIZE(XX) addq INCX, XX movlpd %xmm2, 0 * SIZE(XX) movhpd %xmm2, 1 * SIZE(XX) addq INCX, XX ALIGN_3.L227: testq $1, M je .L999 MOVDDUP(0 * SIZE, X, %xmm0) MOVDDUP(1 * SIZE, X, %xmm1) mulpd %xmm14, %xmm0 mulpd %xmm15, %xmm1 addpd %xmm1, %xmm0 movlpd %xmm0, 0 * SIZE(XX) movhpd %xmm0, 1 * SIZE(XX) ALIGN_3.L999: xorq %rax, %rax RESTOREREGISTERS ret EPILOGUE
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -