nb_kernel211_ia32_sse2.s

来自「最著名最快的分子模拟软件」· S 代码 · 共 1,984 行 · 第 1/5 页

S
1,984
字号
        addpd %xmm6,%xmm4        movapd %xmm4,%xmm7        ## rsqO in xmm7         ## move ixH1-izH1 to xmm4-xmm6         movapd nb211_ixH1(%esp),%xmm4        movapd nb211_iyH1(%esp),%xmm5        movapd nb211_izH1(%esp),%xmm6        ## calc dr         subpd %xmm0,%xmm4        subpd %xmm1,%xmm5        subpd %xmm2,%xmm6        ## store dr         movapd %xmm4,nb211_dxH1(%esp)        movapd %xmm5,nb211_dyH1(%esp)        movapd %xmm6,nb211_dzH1(%esp)        ## square it         mulpd %xmm4,%xmm4        mulpd %xmm5,%xmm5        mulpd %xmm6,%xmm6        addpd %xmm5,%xmm6        addpd %xmm4,%xmm6        ## rsqH1 in xmm6         ## move ixH2-izH2 to xmm3-xmm5          movapd nb211_ixH2(%esp),%xmm3        movapd nb211_iyH2(%esp),%xmm4        movapd nb211_izH2(%esp),%xmm5        ## calc dr         subpd %xmm0,%xmm3        subpd %xmm1,%xmm4        subpd %xmm2,%xmm5        ## store dr         movapd %xmm3,nb211_dxH2(%esp)        movapd %xmm4,nb211_dyH2(%esp)        movapd %xmm5,nb211_dzH2(%esp)        ## square it         mulpd %xmm3,%xmm3        mulpd %xmm4,%xmm4        mulpd %xmm5,%xmm5        addpd %xmm4,%xmm5        addpd %xmm3,%xmm5        ## rsqH2 in xmm5, rsqH1 in xmm6, rsqO in xmm7         movapd %xmm5,%xmm0        movapd %xmm6,%xmm1        movapd %xmm7,%xmm2        mulpd  nb211_krf(%esp),%xmm0        mulpd  nb211_krf(%esp),%xmm1        mulpd  nb211_krf(%esp),%xmm2        movapd %xmm0,nb211_krsqH2(%esp)        movapd %xmm1,nb211_krsqH1(%esp)        movapd %xmm2,nb211_krsqO(%esp)        ## start with rsqO - put seed in xmm2         cvtpd2ps %xmm7,%xmm2        rsqrtps %xmm2,%xmm2        cvtps2pd %xmm2,%xmm2        movapd  %xmm2,%xmm3        mulpd   %xmm2,%xmm2        movapd  nb211_three(%esp),%xmm4        mulpd   %xmm7,%xmm2     ## rsq*lu*lu         subpd   %xmm2,%xmm4     ## 30-rsq*lu*lu         mulpd   %xmm3,%xmm4     ## lu*(3-rsq*lu*lu)         mulpd   nb211_half(%esp),%xmm4   ## iter1 ( new lu)         movapd %xmm4,%xmm3        mulpd %xmm4,%xmm4       ## lu*lu         mulpd %xmm4,%xmm7       ## rsq*lu*lu         movapd nb211_three(%esp),%xmm4        subpd %xmm7,%xmm4       ## 3-rsq*lu*lu         mulpd %xmm3,%xmm4       ## lu*( 3-rsq*lu*lu)         mulpd nb211_half(%esp),%xmm4   ## rinv         movapd  %xmm4,%xmm7     ## rinvO in xmm7         ## rsqH1 - seed in xmm2         cvtpd2ps %xmm6,%xmm2        rsqrtps %xmm2,%xmm2        cvtps2pd %xmm2,%xmm2        movapd  %xmm2,%xmm3        mulpd   %xmm2,%xmm2        movapd  nb211_three(%esp),%xmm4        mulpd   %xmm6,%xmm2     ## rsq*lu*lu         subpd   %xmm2,%xmm4     ## 30-rsq*lu*lu         mulpd   %xmm3,%xmm4     ## lu*(3-rsq*lu*lu)         mulpd   nb211_half(%esp),%xmm4   ## iter1 ( new lu)         movapd %xmm4,%xmm3        mulpd %xmm4,%xmm4       ## lu*lu         mulpd %xmm4,%xmm6       ## rsq*lu*lu         movapd nb211_three(%esp),%xmm4        subpd %xmm6,%xmm4       ## 3-rsq*lu*lu         mulpd %xmm3,%xmm4       ## lu*( 3-rsq*lu*lu)         mulpd nb211_half(%esp),%xmm4   ## rinv         movapd  %xmm4,%xmm6     ## rinvH1 in xmm6         ## rsqH2 - seed in xmm2         cvtpd2ps %xmm5,%xmm2        rsqrtps %xmm2,%xmm2        cvtps2pd %xmm2,%xmm2        movapd  %xmm2,%xmm3        mulpd   %xmm2,%xmm2        movapd  nb211_three(%esp),%xmm4        mulpd   %xmm5,%xmm2     ## rsq*lu*lu         subpd   %xmm2,%xmm4     ## 30-rsq*lu*lu         mulpd   %xmm3,%xmm4     ## lu*(3-rsq*lu*lu)         mulpd   nb211_half(%esp),%xmm4   ## iter1 ( new lu)         movapd %xmm4,%xmm3        mulpd %xmm4,%xmm4       ## lu*lu         mulpd %xmm4,%xmm5       ## rsq*lu*lu         movapd nb211_three(%esp),%xmm4        subpd %xmm5,%xmm4       ## 3-rsq*lu*lu         mulpd %xmm3,%xmm4       ## lu*( 3-rsq*lu*lu)         mulpd nb211_half(%esp),%xmm4   ## rinv         movapd  %xmm4,%xmm5     ## rinvH2 in xmm5         ## do O interactions         movapd  %xmm7,%xmm4        mulpd   %xmm4,%xmm4     ## xmm7=rinv, xmm4=rinvsq         movapd %xmm4,%xmm1        mulpd  %xmm4,%xmm1        mulpd  %xmm4,%xmm1      ## xmm1=rinvsix         movapd %xmm1,%xmm2        mulpd  %xmm2,%xmm2      ## xmm2=rinvtwelve         mulpd  nb211_c6(%esp),%xmm1        mulpd  nb211_c12(%esp),%xmm2        movapd %xmm2,%xmm3        subpd  %xmm1,%xmm3      ## Vvdw=Vvdw12-Vvdw6                    addpd  nb211_Vvdwtot(%esp),%xmm3        mulpd  nb211_six(%esp),%xmm1        mulpd  nb211_twelve(%esp),%xmm2        subpd  %xmm1,%xmm2      ## nb part of fs          movapd %xmm7,%xmm0        movapd nb211_krsqO(%esp),%xmm1        addpd  %xmm1,%xmm0        mulpd  nb211_two(%esp),%xmm1        subpd  nb211_crf(%esp),%xmm0   ## xmm0=rinv+ krsq-crf         subpd  %xmm1,%xmm7        mulpd  nb211_qqO(%esp),%xmm0        mulpd  nb211_qqO(%esp),%xmm7        addpd  %xmm7,%xmm2        mulpd  %xmm2,%xmm4      ## total fsO in xmm4         addpd  nb211_vctot(%esp),%xmm0        movapd %xmm3,nb211_Vvdwtot(%esp)        movapd %xmm0,nb211_vctot(%esp)        movapd nb211_dxO(%esp),%xmm0        movapd nb211_dyO(%esp),%xmm1        movapd nb211_dzO(%esp),%xmm2        mulpd  %xmm4,%xmm0        mulpd  %xmm4,%xmm1        mulpd  %xmm4,%xmm2        ## update O forces         movapd nb211_fixO(%esp),%xmm3        movapd nb211_fiyO(%esp),%xmm4        movapd nb211_fizO(%esp),%xmm7        addpd  %xmm0,%xmm3        addpd  %xmm1,%xmm4        addpd  %xmm2,%xmm7        movapd %xmm3,nb211_fixO(%esp)        movapd %xmm4,nb211_fiyO(%esp)        movapd %xmm7,nb211_fizO(%esp)        ## update j forces with water O         movapd %xmm0,nb211_fjx(%esp)        movapd %xmm1,nb211_fjy(%esp)        movapd %xmm2,nb211_fjz(%esp)        ## H1 interactions         movapd  %xmm6,%xmm4        mulpd   %xmm4,%xmm4     ## xmm6=rinv, xmm4=rinvsq         movapd  %xmm6,%xmm7        movapd  nb211_krsqH1(%esp),%xmm0        addpd   %xmm0,%xmm6     ## xmm6=rinv+ krsq         mulpd   nb211_two(%esp),%xmm0        subpd   nb211_crf(%esp),%xmm6        subpd   %xmm0,%xmm7     ## xmm7=rinv-2*krsq         mulpd   nb211_qqH(%esp),%xmm6   ## vcoul         mulpd   nb211_qqH(%esp),%xmm7        mulpd  %xmm7,%xmm4              ## total fsH1 in xmm4         addpd  nb211_vctot(%esp),%xmm6        movapd nb211_dxH1(%esp),%xmm0        movapd nb211_dyH1(%esp),%xmm1        movapd nb211_dzH1(%esp),%xmm2        movapd %xmm6,nb211_vctot(%esp)        mulpd  %xmm4,%xmm0        mulpd  %xmm4,%xmm1        mulpd  %xmm4,%xmm2        ## update H1 forces         movapd nb211_fixH1(%esp),%xmm3        movapd nb211_fiyH1(%esp),%xmm4        movapd nb211_fizH1(%esp),%xmm7        addpd  %xmm0,%xmm3        addpd  %xmm1,%xmm4        addpd  %xmm2,%xmm7        movapd %xmm3,nb211_fixH1(%esp)        movapd %xmm4,nb211_fiyH1(%esp)        movapd %xmm7,nb211_fizH1(%esp)        ## update j forces with water H1         addpd  nb211_fjx(%esp),%xmm0        addpd  nb211_fjy(%esp),%xmm1        addpd  nb211_fjz(%esp),%xmm2        movapd %xmm0,nb211_fjx(%esp)        movapd %xmm1,nb211_fjy(%esp)        movapd %xmm2,nb211_fjz(%esp)        ## H2 interactions         movapd  %xmm5,%xmm4        mulpd   %xmm4,%xmm4     ## xmm5=rinv, xmm4=rinvsq         movapd  %xmm5,%xmm7        movapd  nb211_krsqH2(%esp),%xmm0        addpd   %xmm0,%xmm5     ## xmm5=rinv+ krsq         mulpd   nb211_two(%esp),%xmm0        subpd   nb211_crf(%esp),%xmm5        subpd   %xmm0,%xmm7     ## xmm7=rinv-2*krsq         mulpd   nb211_qqH(%esp),%xmm5   ## vcoul         mulpd   nb211_qqH(%esp),%xmm7        mulpd  %xmm7,%xmm4              ## total fsH2 in xmm4         addpd  nb211_vctot(%esp),%xmm5        movapd nb211_dxH2(%esp),%xmm0        movapd nb211_dyH2(%esp),%xmm1        movapd nb211_dzH2(%esp),%xmm2        movapd %xmm5,nb211_vctot(%esp)        mulpd  %xmm4,%xmm0        mulpd  %xmm4,%xmm1        mulpd  %xmm4,%xmm2        ## update H2 forces         movapd nb211_fixH2(%esp),%xmm3        movapd nb211_fiyH2(%esp),%xmm4        movapd nb211_fizH2(%esp),%xmm7        addpd  %xmm0,%xmm3        addpd  %xmm1,%xmm4        addpd  %xmm2,%xmm7        movapd %xmm3,nb211_fixH2(%esp)        movapd %xmm4,nb211_fiyH2(%esp)        movapd %xmm7,nb211_fizH2(%esp)        movl nb211_faction(%ebp),%edi        ## update j forces         addpd  nb211_fjx(%esp),%xmm0        addpd  nb211_fjy(%esp),%xmm1        addpd  nb211_fjz(%esp),%xmm2        movlpd (%edi,%eax,8),%xmm3        movlpd 8(%edi,%eax,8),%xmm4        movlpd 16(%edi,%eax,8),%xmm5        movhpd (%edi,%ebx,8),%xmm3        movhpd 8(%edi,%ebx,8),%xmm4        movhpd 16(%edi,%ebx,8),%xmm5        subpd %xmm0,%xmm3        subpd %xmm1,%xmm4        subpd %xmm2,%xmm5        movlpd %xmm3,(%edi,%eax,8)        movlpd %xmm4,8(%edi,%eax,8)        movlpd %xmm5,16(%edi,%eax,8)        movhpd %xmm3,(%edi,%ebx,8)        movhpd %xmm4,8(%edi,%ebx,8)        movhpd %xmm5,16(%edi,%ebx,8)        ## should we do one more iteration?         subl $2,nb211_innerk(%esp)        jl    _nb_kernel211_ia32_sse2.nb211_checksingle        jmp   _nb_kernel211_ia32_sse2.nb211_unroll_loop_nb_kernel211_ia32_sse2.nb211_checksingle:         movl  nb211_innerk(%esp),%edx        andl  $1,%edx        jnz   _nb_kernel211_ia32_sse2.nb211_dosingle        jmp   _nb_kernel211_ia32_sse2.nb211_updateouterdata_nb_kernel211_ia32_sse2.nb211_dosingle:         movl  nb211_innerjjnr(%esp),%edx       ## pointer to jjnr[k]         movl  (%edx),%eax        addl $4,nb211_innerjjnr(%esp)        movl nb211_charge(%ebp),%esi     ## base of charge[]         xorpd %xmm3,%xmm3        movlpd (%esi,%eax,8),%xmm3        movapd %xmm3,%xmm4        mulpd  nb211_iqO(%esp),%xmm3        mulpd  nb211_iqH(%esp),%xmm4        movd  %eax,%mm0         ## use mmx registers as temp storage         movapd  %xmm3,nb211_qqO(%esp)        movapd  %xmm4,nb211_qqH(%esp)        movl nb211_type(%ebp),%esi        movl (%esi,%eax,4),%eax        movl nb211_vdwparam(%ebp),%esi        shll %eax        movl nb211_ntia(%esp),%edi        addl %edi,%eax        movlpd (%esi,%eax,8),%xmm6      ## c6a        movhpd 8(%esi,%eax,8),%xmm6     ## c6a c12a         xorpd %xmm7,%xmm7        movapd %xmm6,%xmm4        unpcklpd %xmm7,%xmm4        unpckhpd %xmm7,%xmm6        movd  %mm0,%eax        movapd %xmm4,nb211_c6(%esp)        movapd %xmm6,nb211_c12(%esp)        movl nb211_pos(%ebp),%esi        ## base of pos[]         leal  (%eax,%eax,2),%eax     ## replace jnr with j3         ## move coordinates to xmm0-xmm2         movlpd (%esi,%eax,8),%xmm0        movlpd 8(%esi,%eax,8),%xmm1        movlpd 16(%esi,%eax,8),%xmm2        ## move ixO-izO to xmm4-xmm6         movapd nb211_ixO(%esp),%xmm4        movapd nb211_iyO(%esp),%xmm5        movapd nb211_izO(%esp),%xmm6        ## calc dr         subsd %xmm0,%xmm4        subsd %xmm1,%xmm5        subsd %xmm2,%xmm6        ## store dr         movapd %xmm4,nb211_dxO(%esp)        movapd %xmm5,nb211_dyO(%esp)        movapd %xmm6,nb211_dzO(%esp)        ## square it         mulsd %xmm4,%xmm4        mulsd %xmm5,%xmm5        mulsd %xmm6,%xmm6        addsd %xmm5,%xmm4        addsd %xmm6,%xmm4        movapd %xmm4,%xmm7        ## rsqO in xmm7         ## move ixH1-izH1 to xmm4-xmm6         movapd nb211_ixH1(%esp),%xmm4        movapd nb211_iyH1(%esp),%xmm5        movapd nb211_izH1(%esp),%xmm6        ## calc dr         subsd %xmm0,%xmm4        subsd %xmm1,%xmm5        subsd %xmm2,%xmm6        ## store dr         movapd %xmm4,nb211_dxH1(%esp)        movapd %xmm5,nb211_dyH1(%esp)        movapd %xmm6,nb211_dzH1(%esp)        ## square it         mulsd %xmm4,%xmm4        mulsd %xmm5,%xmm5        mulsd %xmm6,%xmm6        addsd %xmm5,%xmm6        addsd %xmm4,%xmm6        ## rsqH1 in xmm6         ## move ixH2-izH2 to xmm3-xmm5          movapd nb211_ixH2(%esp),%xmm3        movapd nb211_iyH2(%esp),%xmm4        movapd nb211_izH2(%esp),%xmm5        ## calc dr         subsd %xmm0,%xmm3        subsd %xmm1,%xmm4        subsd %xmm2,%xmm5        ## store dr         movapd %xmm3,nb211_dxH2(%esp)        movapd %xmm4,nb211_dyH2(%esp)        movapd %xmm5,nb211_dzH2(%esp)        ## square it         mulsd %xmm3,%xmm3        mulsd %xmm4,%xmm4        mulsd %xmm5,%xmm5        addsd %xmm4,%xmm5        addsd %xmm3,%xmm5        ## rsqH2 in xmm5, rsqH1 in xmm6, rsqO in xmm7 

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?