nb_kernel204_x86_64_sse2.s

来自「最著名最快的分子模拟软件」· S 代码 · 共 2,256 行 · 第 1/5 页

S
2,256
字号
    subpd   %xmm7,%xmm11 ## 3-rsq*lu*lu        mulpd   %xmm2,%xmm9        mulpd   %xmm5,%xmm10    mulpd   %xmm8,%xmm11 ## lu*(3-rsq*lu*lu)        movapd  nb204_half(%rsp),%xmm15        mulpd   %xmm15,%xmm9 ## first iteration for rinvH1H1         mulpd   %xmm15,%xmm10 ## first iteration for rinvH2H1    mulpd   %xmm15,%xmm11 ## first iteration for rinvMH1     ## second iteration step            movapd  %xmm9,%xmm2        movapd  %xmm10,%xmm5    movapd  %xmm11,%xmm8        mulpd   %xmm2,%xmm2 ## lu*lu        mulpd   %xmm5,%xmm5 ## lu*lu    mulpd   %xmm8,%xmm8 ## lu*lu        movapd  nb204_three(%rsp),%xmm1        movapd  %xmm1,%xmm4    movapd  %xmm1,%xmm7        mulpd   %xmm0,%xmm2 ## rsq*lu*lu        mulpd   %xmm3,%xmm5 ## rsq*lu*lu     mulpd   %xmm6,%xmm8 ## rsq*lu*lu        subpd   %xmm2,%xmm1        subpd   %xmm5,%xmm4    subpd   %xmm8,%xmm7 ## 3-rsq*lu*lu        mulpd   %xmm1,%xmm9        mulpd   %xmm4,%xmm10    mulpd   %xmm7,%xmm11 ## lu*(3-rsq*lu*lu)        movapd  nb204_half(%rsp),%xmm15        mulpd   %xmm15,%xmm9 ##  rinvH1H1         mulpd   %xmm15,%xmm10 ##   rinvH2H1    mulpd   %xmm15,%xmm11 ##   rinvMH1        ## H1 interactions     ## rsq in xmm0,xmm3,xmm6      ## rinv in xmm9, xmm10, xmm11    movapd %xmm9,%xmm1 ## copy of rinv    movapd %xmm10,%xmm4    movapd %xmm11,%xmm7    movapd nb204_krf(%rsp),%xmm2    mulpd  %xmm9,%xmm9  ## rinvsq    mulpd  %xmm10,%xmm10    mulpd  %xmm11,%xmm11    mulpd  %xmm2,%xmm0 ## k*rsq    mulpd  %xmm2,%xmm3    mulpd  %xmm2,%xmm6    movapd %xmm0,%xmm2 ## copy of k*rsq    movapd %xmm3,%xmm5    movapd %xmm6,%xmm8    addpd  %xmm1,%xmm2 ## rinv+krsq    addpd  %xmm4,%xmm5    addpd  %xmm7,%xmm8    movapd nb204_crf(%rsp),%xmm14    subpd  %xmm14,%xmm2  ## rinv+krsq-crf    subpd  %xmm14,%xmm5    subpd  %xmm14,%xmm8    movapd nb204_qqHH(%rsp),%xmm12    movapd nb204_qqMH(%rsp),%xmm13    mulpd  %xmm12,%xmm2 ## voul=qq*(rinv+ krsq-crf)    mulpd  %xmm12,%xmm5 ## voul=qq*(rinv+ krsq-crf)    mulpd  %xmm13,%xmm8 ## voul=qq*(rinv+ krsq-crf)    addpd  %xmm0,%xmm0 ## 2*krsq    addpd  %xmm3,%xmm3    addpd  %xmm6,%xmm6    subpd  %xmm0,%xmm1 ## rinv-2*krsq    subpd  %xmm3,%xmm4    subpd  %xmm6,%xmm7    mulpd  %xmm12,%xmm1  ## (rinv-2*krsq)*qq    mulpd  %xmm12,%xmm4    mulpd  %xmm13,%xmm7    addpd  nb204_vctot(%rsp),%xmm2    addpd  %xmm8,%xmm5    addpd  %xmm5,%xmm2    movapd %xmm2,nb204_vctot(%rsp)    mulpd  %xmm1,%xmm9  ## fscal    mulpd  %xmm4,%xmm10    mulpd  %xmm7,%xmm11    ## move j H1 forces to xmm0-xmm2        movlpd 24(%rdi,%rax,8),%xmm0        movlpd 32(%rdi,%rax,8),%xmm1        movlpd 40(%rdi,%rax,8),%xmm2        movhpd 24(%rdi,%rbx,8),%xmm0        movhpd 32(%rdi,%rbx,8),%xmm1        movhpd 40(%rdi,%rbx,8),%xmm2    movapd %xmm9,%xmm7    movapd %xmm9,%xmm8    movapd %xmm11,%xmm13    movapd %xmm11,%xmm14    movapd %xmm11,%xmm15    movapd %xmm10,%xmm11    movapd %xmm10,%xmm12        mulpd nb204_dxH1H1(%rsp),%xmm7        mulpd nb204_dyH1H1(%rsp),%xmm8        mulpd nb204_dzH1H1(%rsp),%xmm9        mulpd nb204_dxH2H1(%rsp),%xmm10        mulpd nb204_dyH2H1(%rsp),%xmm11        mulpd nb204_dzH2H1(%rsp),%xmm12        mulpd nb204_dxMH1(%rsp),%xmm13        mulpd nb204_dyMH1(%rsp),%xmm14        mulpd nb204_dzMH1(%rsp),%xmm15    addpd %xmm7,%xmm0    addpd %xmm8,%xmm1    addpd %xmm9,%xmm2    addpd nb204_fixH1(%rsp),%xmm7    addpd nb204_fiyH1(%rsp),%xmm8    addpd nb204_fizH1(%rsp),%xmm9    addpd %xmm10,%xmm0    addpd %xmm11,%xmm1    addpd %xmm12,%xmm2    addpd nb204_fixH2(%rsp),%xmm10    addpd nb204_fiyH2(%rsp),%xmm11    addpd nb204_fizH2(%rsp),%xmm12    addpd %xmm13,%xmm0    addpd %xmm14,%xmm1    addpd %xmm15,%xmm2    addpd nb204_fixM(%rsp),%xmm13    addpd nb204_fiyM(%rsp),%xmm14    addpd nb204_fizM(%rsp),%xmm15    movapd %xmm7,nb204_fixH1(%rsp)    movapd %xmm8,nb204_fiyH1(%rsp)    movapd %xmm9,nb204_fizH1(%rsp)    movapd %xmm10,nb204_fixH2(%rsp)    movapd %xmm11,nb204_fiyH2(%rsp)    movapd %xmm12,nb204_fizH2(%rsp)    movapd %xmm13,nb204_fixM(%rsp)    movapd %xmm14,nb204_fiyM(%rsp)    movapd %xmm15,nb204_fizM(%rsp)    ## store back j H1 forces from xmm0-xmm2        movlpd %xmm0,24(%rdi,%rax,8)        movlpd %xmm1,32(%rdi,%rax,8)        movlpd %xmm2,40(%rdi,%rax,8)        movhpd %xmm0,24(%rdi,%rbx,8)        movhpd %xmm1,32(%rdi,%rbx,8)        movhpd %xmm2,40(%rdi,%rbx,8)        ## move j H2 coordinates to local temp variables     movlpd 48(%rsi,%rax,8),%xmm0    movlpd 56(%rsi,%rax,8),%xmm1    movlpd 64(%rsi,%rax,8),%xmm2    movhpd 48(%rsi,%rbx,8),%xmm0    movhpd 56(%rsi,%rbx,8),%xmm1    movhpd 64(%rsi,%rbx,8),%xmm2    ## xmm0 = H2x    ## xmm1 = H2y    ## xmm2 = H2z    movapd %xmm0,%xmm3    movapd %xmm1,%xmm4    movapd %xmm2,%xmm5    movapd %xmm0,%xmm6    movapd %xmm1,%xmm7    movapd %xmm2,%xmm8    subpd nb204_ixH1(%rsp),%xmm0    subpd nb204_iyH1(%rsp),%xmm1    subpd nb204_izH1(%rsp),%xmm2    subpd nb204_ixH2(%rsp),%xmm3    subpd nb204_iyH2(%rsp),%xmm4    subpd nb204_izH2(%rsp),%xmm5    subpd nb204_ixM(%rsp),%xmm6    subpd nb204_iyM(%rsp),%xmm7    subpd nb204_izM(%rsp),%xmm8        movapd %xmm0,nb204_dxH1H2(%rsp)        movapd %xmm1,nb204_dyH1H2(%rsp)        movapd %xmm2,nb204_dzH1H2(%rsp)        mulpd  %xmm0,%xmm0        mulpd  %xmm1,%xmm1        mulpd  %xmm2,%xmm2        movapd %xmm3,nb204_dxH2H2(%rsp)        movapd %xmm4,nb204_dyH2H2(%rsp)        movapd %xmm5,nb204_dzH2H2(%rsp)        mulpd  %xmm3,%xmm3        mulpd  %xmm4,%xmm4        mulpd  %xmm5,%xmm5        movapd %xmm6,nb204_dxMH2(%rsp)        movapd %xmm7,nb204_dyMH2(%rsp)        movapd %xmm8,nb204_dzMH2(%rsp)        mulpd  %xmm6,%xmm6        mulpd  %xmm7,%xmm7        mulpd  %xmm8,%xmm8        addpd  %xmm1,%xmm0        addpd  %xmm2,%xmm0        addpd  %xmm4,%xmm3        addpd  %xmm5,%xmm3    addpd  %xmm7,%xmm6    addpd  %xmm8,%xmm6        ## start doing invsqrt for jH2 atoms    cvtpd2ps %xmm0,%xmm1    cvtpd2ps %xmm3,%xmm4    cvtpd2ps %xmm6,%xmm7        rsqrtps %xmm1,%xmm1        rsqrtps %xmm4,%xmm4    rsqrtps %xmm7,%xmm7    cvtps2pd %xmm1,%xmm1    cvtps2pd %xmm4,%xmm4    cvtps2pd %xmm7,%xmm7        movapd  %xmm1,%xmm2        movapd  %xmm4,%xmm5    movapd  %xmm7,%xmm8        mulpd   %xmm1,%xmm1 ## lu*lu        mulpd   %xmm4,%xmm4 ## lu*lu    mulpd   %xmm7,%xmm7 ## lu*lu        movapd  nb204_three(%rsp),%xmm9        movapd  %xmm9,%xmm10    movapd  %xmm9,%xmm11        mulpd   %xmm0,%xmm1 ## rsq*lu*lu        mulpd   %xmm3,%xmm4 ## rsq*lu*lu     mulpd   %xmm6,%xmm7 ## rsq*lu*lu        subpd   %xmm1,%xmm9        subpd   %xmm4,%xmm10    subpd   %xmm7,%xmm11 ## 3-rsq*lu*lu        mulpd   %xmm2,%xmm9        mulpd   %xmm5,%xmm10    mulpd   %xmm8,%xmm11 ## lu*(3-rsq*lu*lu)        movapd  nb204_half(%rsp),%xmm15        mulpd   %xmm15,%xmm9 ## first iteration for rinvH1H2         mulpd   %xmm15,%xmm10 ## first iteration for rinvH2H2    mulpd   %xmm15,%xmm11 ## first iteration for rinvMH2    ## second iteration step            movapd  %xmm9,%xmm2        movapd  %xmm10,%xmm5    movapd  %xmm11,%xmm8        mulpd   %xmm2,%xmm2 ## lu*lu        mulpd   %xmm5,%xmm5 ## lu*lu    mulpd   %xmm8,%xmm8 ## lu*lu        movapd  nb204_three(%rsp),%xmm1        movapd  %xmm1,%xmm4    movapd  %xmm1,%xmm7        mulpd   %xmm0,%xmm2 ## rsq*lu*lu        mulpd   %xmm3,%xmm5 ## rsq*lu*lu     mulpd   %xmm6,%xmm8 ## rsq*lu*lu        subpd   %xmm2,%xmm1        subpd   %xmm5,%xmm4    subpd   %xmm8,%xmm7 ## 3-rsq*lu*lu        mulpd   %xmm1,%xmm9        mulpd   %xmm4,%xmm10    mulpd   %xmm7,%xmm11 ## lu*(3-rsq*lu*lu)        movapd  nb204_half(%rsp),%xmm15        mulpd   %xmm15,%xmm9 ##  rinvH1H2        mulpd   %xmm15,%xmm10 ##   rinvH2H2    mulpd   %xmm15,%xmm11 ##   rinvMH2        ## H2 interactions     ## rsq in xmm0,xmm3,xmm6      ## rinv in xmm9, xmm10, xmm11    movapd %xmm9,%xmm1 ## copy of rinv    movapd %xmm10,%xmm4    movapd %xmm11,%xmm7    movapd nb204_krf(%rsp),%xmm2    mulpd  %xmm9,%xmm9  ## rinvsq    mulpd  %xmm10,%xmm10    mulpd  %xmm11,%xmm11    mulpd  %xmm2,%xmm0 ## k*rsq    mulpd  %xmm2,%xmm3    mulpd  %xmm2,%xmm6    movapd %xmm0,%xmm2 ## copy of k*rsq    movapd %xmm3,%xmm5    movapd %xmm6,%xmm8    addpd  %xmm1,%xmm2 ## rinv+krsq    addpd  %xmm4,%xmm5    addpd  %xmm7,%xmm8    movapd nb204_crf(%rsp),%xmm14    subpd  %xmm14,%xmm2  ## rinv+krsq-crf    subpd  %xmm14,%xmm5    subpd  %xmm14,%xmm8    movapd nb204_qqHH(%rsp),%xmm12    movapd nb204_qqMH(%rsp),%xmm13    mulpd  %xmm12,%xmm2 ## xmm6=voul=qq*(rinv+ krsq-crf)    mulpd  %xmm12,%xmm5 ## xmm6=voul=qq*(rinv+ krsq-crf)    mulpd  %xmm13,%xmm8 ## xmm6=voul=qq*(rinv+ krsq-crf)    addpd  %xmm0,%xmm0 ## 2*krsq    addpd  %xmm3,%xmm3    addpd  %xmm6,%xmm6    subpd  %xmm0,%xmm1 ## rinv-2*krsq    subpd  %xmm3,%xmm4    subpd  %xmm6,%xmm7    mulpd  %xmm12,%xmm1  ## (rinv-2*krsq)*qq    mulpd  %xmm12,%xmm4    mulpd  %xmm13,%xmm7    addpd  nb204_vctot(%rsp),%xmm2    addpd  %xmm8,%xmm5    addpd  %xmm5,%xmm2    movapd  %xmm2,nb204_vctot(%rsp)    mulpd  %xmm1,%xmm9  ## fscal    mulpd  %xmm4,%xmm10    mulpd  %xmm7,%xmm11    ## move j H2 forces to xmm0-xmm2        movlpd 48(%rdi,%rax,8),%xmm0        movlpd 56(%rdi,%rax,8),%xmm1        movlpd 64(%rdi,%rax,8),%xmm2        movhpd 48(%rdi,%rbx,8),%xmm0        movhpd 56(%rdi,%rbx,8),%xmm1        movhpd 64(%rdi,%rbx,8),%xmm2    movapd %xmm9,%xmm7    movapd %xmm9,%xmm8    movapd %xmm11,%xmm13    movapd %xmm11,%xmm14    movapd %xmm11,%xmm15    movapd %xmm10,%xmm11    movapd %xmm10,%xmm12        mulpd nb204_dxH1H2(%rsp),%xmm7        mulpd nb204_dyH1H2(%rsp),%xmm8        mulpd nb204_dzH1H2(%rsp),%xmm9        mulpd nb204_dxH2H2(%rsp),%xmm10        mulpd nb204_dyH2H2(%rsp),%xmm11        mulpd nb204_dzH2H2(%rsp),%xmm12        mulpd nb204_dxMH2(%rsp),%xmm13        mulpd nb204_dyMH2(%rsp),%xmm14        mulpd nb204_dzMH2(%rsp),%xmm15    addpd %xmm7,%xmm0    addpd %xmm8,%xmm1    addpd %xmm9,%xmm2    addpd nb204_fixH1(%rsp),%xmm7    addpd nb204_fiyH1(%rsp),%xmm8    addpd nb204_fizH1(%rsp),%xmm9    addpd %xmm10,%xmm0    addpd %xmm11,%xmm1    addpd %xmm12,%xmm2    addpd nb204_fixH2(%rsp),%xmm10    addpd nb204_fiyH2(%rsp),%xmm11    addpd nb204_fizH2(%rsp),%xmm12    addpd %xmm13,%xmm0    addpd %xmm14,%xmm1    addpd %xmm15,%xmm2    addpd nb204_fixM(%rsp),%xmm13    addpd nb204_fiyM(%rsp),%xmm14    addpd nb204_fizM(%rsp),%xmm15    movapd %xmm7,nb204_fixH1(%rsp)    movapd %xmm8,nb204_fiyH1(%rsp)    movapd %xmm9,nb204_fizH1(%rsp)    movapd %xmm10,nb204_fixH2(%rsp)    movapd %xmm11,nb204_fiyH2(%rsp)    movapd %xmm12,nb204_fizH2(%rsp)    movapd %xmm13,nb204_fixM(%rsp)    movapd %xmm14,nb204_fiyM(%rsp)    movapd %xmm15,nb204_fizM(%rsp)    ## store back j H2 forces from xmm0-xmm2        movlpd %xmm0,48(%rdi,%rax,8)        movlpd %xmm1,56(%rdi,%rax,8)        movlpd %xmm2,64(%rdi,%rax,8)        movhpd %xmm0,48(%rdi,%rbx,8)        movhpd %xmm1,56(%rdi,%rbx,8)        movhpd %xmm2,64(%rdi,%rbx,8)        ## move j M coordinates to local temp variables     movlpd 72(%rsi,%rax,8),%xmm0    movlpd 80(%rsi,%rax,8),%xmm1    movlpd 88(%rsi,%rax,8),%xmm2    movhpd 72(%rsi,%rbx,8),%xmm0    movhpd 80(%rsi,%rbx,8),%xmm1    movhpd 88(%rsi,%rbx,8),%xmm2    ## xmm0 = Mx    ## xmm1 = My    ## xmm2 = Mz    movapd %xmm0,%xmm3    movapd %xmm1,%xmm4    movapd %xmm2,%xmm5    movapd %xmm0,%xmm6    movapd %xmm1,%xmm7    movapd %xmm2,%xmm8    subpd nb204_ixH1(%rsp),%xmm0    subpd nb204_iyH1(%rsp),%xmm1    subpd nb204_izH1(%rsp),%xmm2    subpd nb204_ixH2(%rsp),%xmm3    subpd nb204_iyH2(%rsp),%xmm4    subpd nb204_izH2(%rsp),%xmm5    subpd nb204_ixM(%rsp),%xmm6    subpd nb204_iyM(%rsp),%xmm7    subpd nb204_izM(%rsp),%xmm8        movapd %xmm0,nb204_dxH1M(%rsp)        movapd %xmm1,nb204_dyH1M(%rsp)        movapd %xmm2,nb204_dzH1M(%rsp)        mulpd  %xmm0,%xmm0        mulpd  %xmm1,%xmm1        mulpd  %xmm2,%xmm2        movapd %xmm3,nb204_dxH2M(%rsp)        movapd %xmm4,nb204_dyH2M(%rsp)        movapd %xmm5,nb204_dzH2M(%rsp)        mulpd  %xmm3,%xmm3        mulpd  %xmm4,%xmm4        mulpd  %xmm5,%xmm5        movapd %xmm6,nb204_dxMM(%rsp)        movapd %xmm7,nb204_dyMM(%rsp)        movapd %xmm8,nb204_dzMM(%rsp)        mulpd  %xmm6,%xmm6        mulpd  %xmm7,%xmm7        mulpd  %xmm8,%xmm8        addpd  %xmm1,%xmm0        addpd  %xmm2,%xmm0        addpd  %xmm4,%xmm3        addpd  %xmm5,%xmm3    addpd  %xmm7,%xmm6    addpd  %xmm8,%xmm6        ## start doing invsqrt for jM atoms    cvtpd2ps %xmm0,%xmm1    cvtpd2ps %xmm3,%xmm4    cvtpd2ps %xmm6,%xmm7        rsqrtps %xmm1,%xmm1        rsqrtps %xmm4,%xmm4    rsqrtps %xmm7,%xmm7    cvtps2pd %xmm1,%xmm1

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?