nb_kernel211_ia32_sse2.s

来自「最著名最快的分子模拟软件」· S 代码 · 共 1,984 行 · 第 1/5 页

S
1,984
字号
        movapd %xmm5,%xmm0        movapd %xmm6,%xmm1        movapd %xmm7,%xmm2        mulsd  nb211_krf(%esp),%xmm0        mulsd  nb211_krf(%esp),%xmm1        mulsd  nb211_krf(%esp),%xmm2        movapd %xmm0,nb211_krsqH2(%esp)        movapd %xmm1,nb211_krsqH1(%esp)        movapd %xmm2,nb211_krsqO(%esp)        ## start with rsqO - put seed in xmm2         cvtsd2ss %xmm7,%xmm2        rsqrtss %xmm2,%xmm2        cvtss2sd %xmm2,%xmm2        movapd  %xmm2,%xmm3        mulsd   %xmm2,%xmm2        movapd  nb211_three(%esp),%xmm4        mulsd   %xmm7,%xmm2     ## rsq*lu*lu         subsd   %xmm2,%xmm4     ## 30-rsq*lu*lu         mulsd   %xmm3,%xmm4     ## lu*(3-rsq*lu*lu)         mulsd   nb211_half(%esp),%xmm4   ## iter1 ( new lu)         movapd %xmm4,%xmm3        mulsd %xmm4,%xmm4       ## lu*lu         mulsd %xmm4,%xmm7       ## rsq*lu*lu         movapd nb211_three(%esp),%xmm4        subsd %xmm7,%xmm4       ## 3-rsq*lu*lu         mulsd %xmm3,%xmm4       ## lu*( 3-rsq*lu*lu)         mulsd nb211_half(%esp),%xmm4   ## rinv         movapd  %xmm4,%xmm7     ## rinvO in xmm7         ## rsqH1 - seed in xmm2         cvtsd2ss %xmm6,%xmm2        rsqrtss %xmm2,%xmm2        cvtss2sd %xmm2,%xmm2        movapd  %xmm2,%xmm3        mulsd   %xmm2,%xmm2        movapd  nb211_three(%esp),%xmm4        mulsd   %xmm6,%xmm2     ## rsq*lu*lu         subsd   %xmm2,%xmm4     ## 30-rsq*lu*lu         mulsd   %xmm3,%xmm4     ## lu*(3-rsq*lu*lu)         mulsd   nb211_half(%esp),%xmm4   ## iter1 ( new lu)         movapd %xmm4,%xmm3        mulsd %xmm4,%xmm4       ## lu*lu         mulsd %xmm4,%xmm6       ## rsq*lu*lu         movapd nb211_three(%esp),%xmm4        subsd %xmm6,%xmm4       ## 3-rsq*lu*lu         mulsd %xmm3,%xmm4       ## lu*( 3-rsq*lu*lu)         mulsd nb211_half(%esp),%xmm4   ## rinv         movapd  %xmm4,%xmm6     ## rinvH1 in xmm6         ## rsqH2 - seed in xmm2         cvtsd2ss %xmm5,%xmm2        rsqrtss %xmm2,%xmm2        cvtss2sd %xmm2,%xmm2        movapd  %xmm2,%xmm3        mulsd   %xmm2,%xmm2        movapd  nb211_three(%esp),%xmm4        mulsd   %xmm5,%xmm2     ## rsq*lu*lu         subsd   %xmm2,%xmm4     ## 30-rsq*lu*lu         mulsd   %xmm3,%xmm4     ## lu*(3-rsq*lu*lu)         mulsd   nb211_half(%esp),%xmm4   ## iter1 ( new lu)         movapd %xmm4,%xmm3        mulsd %xmm4,%xmm4       ## lu*lu         mulsd %xmm4,%xmm5       ## rsq*lu*lu         movapd nb211_three(%esp),%xmm4        subsd %xmm5,%xmm4       ## 3-rsq*lu*lu         mulsd %xmm3,%xmm4       ## lu*( 3-rsq*lu*lu)         mulsd nb211_half(%esp),%xmm4   ## rinv         movapd  %xmm4,%xmm5     ## rinvH2 in xmm5         ## do O interactions         movapd  %xmm7,%xmm4        mulsd   %xmm4,%xmm4     ## xmm7=rinv, xmm4=rinvsq         movapd %xmm4,%xmm1        mulsd  %xmm4,%xmm1        mulsd  %xmm4,%xmm1      ## xmm1=rinvsix         movapd %xmm1,%xmm2        mulsd  %xmm2,%xmm2      ## xmm2=rinvtwelve         mulsd  nb211_c6(%esp),%xmm1        mulsd  nb211_c12(%esp),%xmm2        movapd %xmm2,%xmm3        subsd  %xmm1,%xmm3      ## Vvdw=Vvdw12-Vvdw6                    addsd  nb211_Vvdwtot(%esp),%xmm3        mulsd  nb211_six(%esp),%xmm1        mulsd  nb211_twelve(%esp),%xmm2        subsd  %xmm1,%xmm2      ## nb part of fs          movapd %xmm7,%xmm0        movapd nb211_krsqO(%esp),%xmm1        addsd  %xmm1,%xmm0        mulsd  nb211_two(%esp),%xmm1        subsd  nb211_crf(%esp),%xmm0   ## xmm0=rinv+ krsq-crf         subsd  %xmm1,%xmm7        mulsd  nb211_qqO(%esp),%xmm0        mulsd  nb211_qqO(%esp),%xmm7        addsd  %xmm7,%xmm2        mulsd  %xmm2,%xmm4      ## total fsO in xmm4         addsd  nb211_vctot(%esp),%xmm0        movlpd %xmm3,nb211_Vvdwtot(%esp)        movlpd %xmm0,nb211_vctot(%esp)        movapd nb211_dxO(%esp),%xmm0        movapd nb211_dyO(%esp),%xmm1        movapd nb211_dzO(%esp),%xmm2        mulsd  %xmm4,%xmm0        mulsd  %xmm4,%xmm1        mulsd  %xmm4,%xmm2        ## update O forces         movapd nb211_fixO(%esp),%xmm3        movapd nb211_fiyO(%esp),%xmm4        movapd nb211_fizO(%esp),%xmm7        addsd  %xmm0,%xmm3        addsd  %xmm1,%xmm4        addsd  %xmm2,%xmm7        movlpd %xmm3,nb211_fixO(%esp)        movlpd %xmm4,nb211_fiyO(%esp)        movlpd %xmm7,nb211_fizO(%esp)        ## update j forces with water O         movlpd %xmm0,nb211_fjx(%esp)        movlpd %xmm1,nb211_fjy(%esp)        movlpd %xmm2,nb211_fjz(%esp)        ## H1 interactions         movapd  %xmm6,%xmm4        mulsd   %xmm4,%xmm4     ## xmm6=rinv, xmm4=rinvsq         movapd  %xmm6,%xmm7        movapd  nb211_krsqH1(%esp),%xmm0        addsd   %xmm0,%xmm6     ## xmm6=rinv+ krsq         mulsd   nb211_two(%esp),%xmm0        subsd   nb211_crf(%esp),%xmm6        subsd   %xmm0,%xmm7     ## xmm7=rinv-2*krsq         mulsd   nb211_qqH(%esp),%xmm6   ## vcoul         mulsd   nb211_qqH(%esp),%xmm7        mulsd  %xmm7,%xmm4              ## total fsH1 in xmm4         addsd  nb211_vctot(%esp),%xmm6        movapd nb211_dxH1(%esp),%xmm0        movapd nb211_dyH1(%esp),%xmm1        movapd nb211_dzH1(%esp),%xmm2        movlpd %xmm6,nb211_vctot(%esp)        mulsd  %xmm4,%xmm0        mulsd  %xmm4,%xmm1        mulsd  %xmm4,%xmm2        ## update H1 forces         movapd nb211_fixH1(%esp),%xmm3        movapd nb211_fiyH1(%esp),%xmm4        movapd nb211_fizH1(%esp),%xmm7        addsd  %xmm0,%xmm3        addsd  %xmm1,%xmm4        addsd  %xmm2,%xmm7        movlpd %xmm3,nb211_fixH1(%esp)        movlpd %xmm4,nb211_fiyH1(%esp)        movlpd %xmm7,nb211_fizH1(%esp)        ## update j forces with water H1         addsd  nb211_fjx(%esp),%xmm0        addsd  nb211_fjy(%esp),%xmm1        addsd  nb211_fjz(%esp),%xmm2        movlpd %xmm0,nb211_fjx(%esp)        movlpd %xmm1,nb211_fjy(%esp)        movlpd %xmm2,nb211_fjz(%esp)        ## H2 interactions         movapd  %xmm5,%xmm4        mulsd   %xmm4,%xmm4     ## xmm5=rinv, xmm4=rinvsq         movapd  %xmm5,%xmm7        movapd  nb211_krsqH2(%esp),%xmm0        addsd   %xmm0,%xmm5     ## xmm5=rinv+ krsq         mulsd   nb211_two(%esp),%xmm0        subsd   nb211_crf(%esp),%xmm5        subsd   %xmm0,%xmm7     ## xmm7=rinv-2*krsq         mulsd   nb211_qqH(%esp),%xmm5   ## vcoul         mulsd   nb211_qqH(%esp),%xmm7        mulsd  %xmm7,%xmm4              ## total fsH2 in xmm4         addsd  nb211_vctot(%esp),%xmm5        movapd nb211_dxH2(%esp),%xmm0        movapd nb211_dyH2(%esp),%xmm1        movapd nb211_dzH2(%esp),%xmm2        movlpd %xmm5,nb211_vctot(%esp)        mulsd  %xmm4,%xmm0        mulsd  %xmm4,%xmm1        mulsd  %xmm4,%xmm2        ## update H2 forces         movapd nb211_fixH2(%esp),%xmm3        movapd nb211_fiyH2(%esp),%xmm4        movapd nb211_fizH2(%esp),%xmm7        addsd  %xmm0,%xmm3        addsd  %xmm1,%xmm4        addsd  %xmm2,%xmm7        movlpd %xmm3,nb211_fixH2(%esp)        movlpd %xmm4,nb211_fiyH2(%esp)        movlpd %xmm7,nb211_fizH2(%esp)        movl nb211_faction(%ebp),%edi        ## update j forces         addsd  nb211_fjx(%esp),%xmm0        addsd  nb211_fjy(%esp),%xmm1        addsd  nb211_fjz(%esp),%xmm2        movlpd (%edi,%eax,8),%xmm3        movlpd 8(%edi,%eax,8),%xmm4        movlpd 16(%edi,%eax,8),%xmm5        subsd %xmm0,%xmm3        subsd %xmm1,%xmm4        subsd %xmm2,%xmm5        movlpd %xmm3,(%edi,%eax,8)        movlpd %xmm4,8(%edi,%eax,8)        movlpd %xmm5,16(%edi,%eax,8)_nb_kernel211_ia32_sse2.nb211_updateouterdata:         movl  nb211_ii3(%esp),%ecx        movl  nb211_faction(%ebp),%edi        movl  nb211_fshift(%ebp),%esi        movl  nb211_is3(%esp),%edx        ## accumulate  Oi forces in xmm0, xmm1, xmm2         movapd nb211_fixO(%esp),%xmm0        movapd nb211_fiyO(%esp),%xmm1        movapd nb211_fizO(%esp),%xmm2        movhlps %xmm0,%xmm3        movhlps %xmm1,%xmm4        movhlps %xmm2,%xmm5        addsd  %xmm3,%xmm0        addsd  %xmm4,%xmm1        addsd  %xmm5,%xmm2 ## sum is in low xmm0-xmm2         movapd %xmm0,%xmm3        movapd %xmm1,%xmm4        movapd %xmm2,%xmm5        ## increment i force         movsd  (%edi,%ecx,8),%xmm3        movsd  8(%edi,%ecx,8),%xmm4        movsd  16(%edi,%ecx,8),%xmm5        addsd  %xmm0,%xmm3        addsd  %xmm1,%xmm4        addsd  %xmm2,%xmm5        movsd  %xmm3,(%edi,%ecx,8)        movsd  %xmm4,8(%edi,%ecx,8)        movsd  %xmm5,16(%edi,%ecx,8)        ## accumulate force in xmm6/xmm7 for fshift         movapd %xmm0,%xmm6        movsd %xmm2,%xmm7        unpcklpd %xmm1,%xmm6        ## accumulate H1i forces in xmm0, xmm1, xmm2         movapd nb211_fixH1(%esp),%xmm0        movapd nb211_fiyH1(%esp),%xmm1        movapd nb211_fizH1(%esp),%xmm2        movhlps %xmm0,%xmm3        movhlps %xmm1,%xmm4        movhlps %xmm2,%xmm5        addsd  %xmm3,%xmm0        addsd  %xmm4,%xmm1        addsd  %xmm5,%xmm2 ## sum is in low xmm0-xmm2         ## increment i force         movsd  24(%edi,%ecx,8),%xmm3        movsd  32(%edi,%ecx,8),%xmm4        movsd  40(%edi,%ecx,8),%xmm5        addsd  %xmm0,%xmm3        addsd  %xmm1,%xmm4        addsd  %xmm2,%xmm5        movsd  %xmm3,24(%edi,%ecx,8)        movsd  %xmm4,32(%edi,%ecx,8)        movsd  %xmm5,40(%edi,%ecx,8)        ## accumulate force in xmm6/xmm7 for fshift         addsd %xmm2,%xmm7        unpcklpd %xmm1,%xmm0        addpd %xmm0,%xmm6        ## accumulate H2i forces in xmm0, xmm1, xmm2         movapd nb211_fixH2(%esp),%xmm0        movapd nb211_fiyH2(%esp),%xmm1        movapd nb211_fizH2(%esp),%xmm2        movhlps %xmm0,%xmm3        movhlps %xmm1,%xmm4        movhlps %xmm2,%xmm5        addsd  %xmm3,%xmm0        addsd  %xmm4,%xmm1        addsd  %xmm5,%xmm2 ## sum is in low xmm0-xmm2         movapd %xmm0,%xmm3        movapd %xmm1,%xmm4        movapd %xmm2,%xmm5        ## increment i force         movsd  48(%edi,%ecx,8),%xmm3        movsd  56(%edi,%ecx,8),%xmm4        movsd  64(%edi,%ecx,8),%xmm5        addsd  %xmm0,%xmm3        addsd  %xmm1,%xmm4        addsd  %xmm2,%xmm5        movsd  %xmm3,48(%edi,%ecx,8)        movsd  %xmm4,56(%edi,%ecx,8)        movsd  %xmm5,64(%edi,%ecx,8)        ## accumulate force in xmm6/xmm7 for fshift         addsd %xmm2,%xmm7        unpcklpd %xmm1,%xmm0        addpd %xmm0,%xmm6        ## increment fshift force         movlpd (%esi,%edx,8),%xmm3        movhpd 8(%esi,%edx,8),%xmm3        movsd  16(%esi,%edx,8),%xmm4        addpd  %xmm6,%xmm3        addsd  %xmm7,%xmm4        movlpd %xmm3,(%esi,%edx,8)        movhpd %xmm3,8(%esi,%edx,8)        movsd  %xmm4,16(%esi,%edx,8)        ## get n from stack        movl nb211_n(%esp),%esi        ## get group index for i particle         movl  nb211_gid(%ebp),%edx              ## base of gid[]        movl  (%edx,%esi,4),%edx                ## ggid=gid[n]        ## accumulate total potential energy and update it         movapd nb211_vctot(%esp),%xmm7        ## accumulate         movhlps %xmm7,%xmm6        addsd  %xmm6,%xmm7      ## low xmm7 has the sum now         ## add earlier value from mem         movl  nb211_Vc(%ebp),%eax        addsd (%eax,%edx,8),%xmm7        ## move back to mem         movsd %xmm7,(%eax,%edx,8)        ## accumulate total lj energy and update it         movapd nb211_Vvdwtot(%esp),%xmm7        ## accumulate         movhlps %xmm7,%xmm6        addsd  %xmm6,%xmm7      ## low xmm7 has the sum now         ## add earlier value from mem         movl  nb211_Vvdw(%ebp),%eax        addsd (%eax,%edx,8),%xmm7        ## move back to mem         movsd %xmm7,(%eax,%edx,8)       ## finish if last         movl nb211_nn1(%esp),%ecx        ## esi already loaded with n        incl %esi        subl %esi,%ecx        jz _nb_kernel211_ia32_sse2.nb211_outerend        ## not last, iterate outer loop once more!          movl %esi,nb211_n(%esp)        jmp _nb_kernel211_ia32_sse2.nb211_outer_nb_kernel211_ia32_sse2.nb211_outerend:         ## check if more outer neighborlists remain        movl  nb211_nri(%esp),%ecx        ## esi already loaded with n above        subl  %esi,%ecx        jz _nb_kernel211_ia32_sse2.nb211_end        ## non-zero, do one more workunit        jmp   _nb_kernel211_ia32_sse2.nb211_threadloop_nb_kernel211_ia32_sse2.nb211_end:         emms        movl nb211_nouter(%esp),%eax        movl nb211_ninner(%esp),%ebx        movl nb211_outeriter(%ebp),%ecx        movl nb211_inneriter(%ebp),%edx        movl %eax,(%ecx)        movl %ebx,(%edx)        movl nb211_salign(%esp),%eax        addl %eax,%esp        addl $812,%esp        popl %edi        popl %esi    popl %edx    popl %ecx    popl %ebx

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?