nb_kernel103_x86_64_sse2.s

来自「最著名最快的分子模拟软件」· S 代码 · 共 1,598 行 · 第 1/4 页

S
1,598
字号
    subpd   %xmm7,%xmm11 ## 3-rsq*lu*lu        mulpd   %xmm2,%xmm9        mulpd   %xmm5,%xmm10    mulpd   %xmm8,%xmm11 ## lu*(3-rsq*lu*lu)        movapd  nb103_half(%rsp),%xmm15        mulpd   %xmm15,%xmm9 ## first iteration for rinvH1        mulpd   %xmm15,%xmm10 ## first iteration for rinvH2    mulpd   %xmm15,%xmm11 ## first iteration for rinvM    ## second iteration step            movapd  %xmm9,%xmm2        movapd  %xmm10,%xmm5    movapd  %xmm11,%xmm8        mulpd   %xmm2,%xmm2 ## lu*lu        mulpd   %xmm5,%xmm5 ## lu*lu    mulpd   %xmm8,%xmm8 ## lu*lu        movapd  nb103_three(%rsp),%xmm1        movapd  %xmm1,%xmm4    movapd  %xmm1,%xmm7        mulpd   %xmm0,%xmm2 ## rsq*lu*lu        mulpd   %xmm3,%xmm5 ## rsq*lu*lu     mulpd   %xmm6,%xmm8 ## rsq*lu*lu        subpd   %xmm2,%xmm1        subpd   %xmm5,%xmm4    subpd   %xmm8,%xmm7 ## 3-rsq*lu*lu        mulpd   %xmm1,%xmm9        mulpd   %xmm4,%xmm10    mulpd   %xmm7,%xmm11 ## lu*(3-rsq*lu*lu)        movapd  nb103_half(%rsp),%xmm15        mulpd   %xmm15,%xmm9 ##  rinvH1        mulpd   %xmm15,%xmm10 ##   rinvH2    mulpd   %xmm15,%xmm11 ##   rinvM        ## interactions     movapd %xmm9,%xmm0    movapd %xmm10,%xmm1    movapd %xmm11,%xmm2    mulpd  %xmm9,%xmm9    mulpd  %xmm10,%xmm10    mulpd  %xmm11,%xmm11    mulpd  nb103_qqH(%rsp),%xmm0    mulpd  nb103_qqH(%rsp),%xmm1    mulpd  nb103_qqM(%rsp),%xmm2    mulpd  %xmm0,%xmm9    mulpd  %xmm1,%xmm10    mulpd  %xmm2,%xmm11    addpd nb103_vctot(%rsp),%xmm0    addpd %xmm2,%xmm1    addpd %xmm1,%xmm0    movapd %xmm0,nb103_vctot(%rsp)    ## move j forces to xmm0-xmm2        movq  nb103_faction(%rbp),%rdi        movlpd (%rdi,%rax,8),%xmm0        movlpd 8(%rdi,%rax,8),%xmm1        movlpd 16(%rdi,%rax,8),%xmm2        movhpd (%rdi,%rbx,8),%xmm0        movhpd 8(%rdi,%rbx,8),%xmm1        movhpd 16(%rdi,%rbx,8),%xmm2    movapd %xmm9,%xmm7    movapd %xmm9,%xmm8    movapd %xmm11,%xmm13    movapd %xmm11,%xmm14    movapd %xmm11,%xmm15    movapd %xmm10,%xmm11    movapd %xmm10,%xmm12        mulpd nb103_dxH1(%rsp),%xmm7        mulpd nb103_dyH1(%rsp),%xmm8        mulpd nb103_dzH1(%rsp),%xmm9        mulpd nb103_dxH2(%rsp),%xmm10        mulpd nb103_dyH2(%rsp),%xmm11        mulpd nb103_dzH2(%rsp),%xmm12        mulpd nb103_dxM(%rsp),%xmm13        mulpd nb103_dyM(%rsp),%xmm14        mulpd nb103_dzM(%rsp),%xmm15    addpd %xmm7,%xmm0    addpd %xmm8,%xmm1    addpd %xmm9,%xmm2    addpd nb103_fixH1(%rsp),%xmm7    addpd nb103_fiyH1(%rsp),%xmm8    addpd nb103_fizH1(%rsp),%xmm9    addpd %xmm10,%xmm0    addpd %xmm11,%xmm1    addpd %xmm12,%xmm2    addpd nb103_fixH2(%rsp),%xmm10    addpd nb103_fiyH2(%rsp),%xmm11    addpd nb103_fizH2(%rsp),%xmm12    addpd %xmm13,%xmm0    addpd %xmm14,%xmm1    addpd %xmm15,%xmm2    addpd nb103_fixM(%rsp),%xmm13    addpd nb103_fiyM(%rsp),%xmm14    addpd nb103_fizM(%rsp),%xmm15    movapd %xmm7,nb103_fixH1(%rsp)    movapd %xmm8,nb103_fiyH1(%rsp)    movapd %xmm9,nb103_fizH1(%rsp)    movapd %xmm10,nb103_fixH2(%rsp)    movapd %xmm11,nb103_fiyH2(%rsp)    movapd %xmm12,nb103_fizH2(%rsp)    movapd %xmm13,nb103_fixM(%rsp)    movapd %xmm14,nb103_fiyM(%rsp)    movapd %xmm15,nb103_fizM(%rsp)    ## store back j forces from xmm0-xmm2        movlpd %xmm0,(%rdi,%rax,8)        movlpd %xmm1,8(%rdi,%rax,8)        movlpd %xmm2,16(%rdi,%rax,8)        movhpd %xmm0,(%rdi,%rbx,8)        movhpd %xmm1,8(%rdi,%rbx,8)        movhpd %xmm2,16(%rdi,%rbx,8)        ## should we do one more iteration?         subl $2,nb103_innerk(%rsp)        jl    _nb_kernel103_x86_64_sse2.nb103_checksingle        jmp   _nb_kernel103_x86_64_sse2.nb103_unroll_loop_nb_kernel103_x86_64_sse2.nb103_checksingle:            movl  nb103_innerk(%rsp),%edx        andl  $1,%edx        jnz    _nb_kernel103_x86_64_sse2.nb103_dosingle        jmp    _nb_kernel103_x86_64_sse2.nb103_updateouterdata_nb_kernel103_x86_64_sse2.nb103_dosingle:         movq  nb103_innerjjnr(%rsp),%rdx       ## pointer to jjnr[k]         movl  (%rdx),%eax        movq nb103_charge(%rbp),%rsi     ## base of charge[]         xorpd %xmm6,%xmm6        movlpd (%rsi,%rax,8),%xmm6      ## jq A         movapd nb103_iqM(%rsp),%xmm3        movapd nb103_iqH(%rsp),%xmm4        mulsd %xmm6,%xmm3               ## qqM        mulsd %xmm6,%xmm4               ## qqH         movapd  %xmm3,nb103_qqM(%rsp)        movapd  %xmm4,nb103_qqH(%rsp)        movq nb103_pos(%rbp),%rsi        ## base of pos[]         lea  (%rax,%rax,2),%rax     ## replace jnr with j3         ## move coordinates to xmm4-xmm6 & xmm0-xmm2            movlpd (%rsi,%rax,8),%xmm4        movlpd 8(%rsi,%rax,8),%xmm5        movlpd 16(%rsi,%rax,8),%xmm6    movapd %xmm4,%xmm0    movapd %xmm5,%xmm1    movapd %xmm6,%xmm2        ## calc dr         subsd nb103_ixM(%rsp),%xmm4        subsd nb103_iyM(%rsp),%xmm5        subsd nb103_izM(%rsp),%xmm6        ## store dr         movapd %xmm4,nb103_dxM(%rsp)        movapd %xmm5,nb103_dyM(%rsp)        movapd %xmm6,nb103_dzM(%rsp)        ## square it         mulsd %xmm4,%xmm4        mulsd %xmm5,%xmm5        mulsd %xmm6,%xmm6        addsd %xmm5,%xmm4        addsd %xmm6,%xmm4        movapd %xmm4,%xmm7        ## rsqM in xmm7         ## move j coords to xmm4-xmm6         movapd %xmm0,%xmm4        movapd %xmm1,%xmm5        movapd %xmm2,%xmm6        ## calc dr         subsd nb103_ixH1(%rsp),%xmm4        subsd nb103_iyH1(%rsp),%xmm5        subsd nb103_izH1(%rsp),%xmm6        ## store dr         movapd %xmm4,nb103_dxH1(%rsp)        movapd %xmm5,nb103_dyH1(%rsp)        movapd %xmm6,nb103_dzH1(%rsp)        ## square it         mulsd %xmm4,%xmm4        mulsd %xmm5,%xmm5        mulsd %xmm6,%xmm6        addsd %xmm5,%xmm6        addsd %xmm4,%xmm6        ## rsqH1 in xmm6         ## move j coords to xmm3-xmm5         movapd %xmm0,%xmm3        movapd %xmm1,%xmm4        movapd %xmm2,%xmm5        ## calc dr         subsd nb103_ixH2(%rsp),%xmm3        subsd nb103_iyH2(%rsp),%xmm4        subsd nb103_izH2(%rsp),%xmm5        ## store dr         movapd %xmm3,nb103_dxH2(%rsp)        movapd %xmm4,nb103_dyH2(%rsp)        movapd %xmm5,nb103_dzH2(%rsp)        ## square it         mulsd %xmm3,%xmm3        mulsd %xmm4,%xmm4        mulsd %xmm5,%xmm5        addsd %xmm4,%xmm5        addsd %xmm3,%xmm5        ## rsqH2 in xmm5, rsqH1 in xmm6, rsqO in xmm7         ## start with rsqM - put seed in xmm2         cvtsd2ss %xmm7,%xmm2        rsqrtss %xmm2,%xmm2        cvtss2sd %xmm2,%xmm2        movapd  %xmm2,%xmm3        mulsd   %xmm2,%xmm2        movapd  nb103_three(%rsp),%xmm4        mulsd   %xmm7,%xmm2     ## rsq*lu*lu         subsd   %xmm2,%xmm4     ## 30-rsq*lu*lu         mulsd   %xmm3,%xmm4     ## lu*(3-rsq*lu*lu)         mulsd   nb103_half(%rsp),%xmm4   ## iter1 ( new lu)         movapd %xmm4,%xmm3        mulsd %xmm4,%xmm4       ## lu*lu         mulsd %xmm4,%xmm7       ## rsq*lu*lu         movapd nb103_three(%rsp),%xmm4        subsd %xmm7,%xmm4       ## 3-rsq*lu*lu         mulsd %xmm3,%xmm4       ## lu*( 3-rsq*lu*lu)         mulsd nb103_half(%rsp),%xmm4   ## rinv         movapd  %xmm4,%xmm7     ## rinvM in xmm7         ## rsqH1 - seed in xmm2         cvtsd2ss %xmm6,%xmm2        rsqrtss %xmm2,%xmm2        cvtss2sd %xmm2,%xmm2        movapd  %xmm2,%xmm3        mulsd   %xmm2,%xmm2        movapd  nb103_three(%rsp),%xmm4        mulsd   %xmm6,%xmm2     ## rsq*lu*lu         subsd   %xmm2,%xmm4     ## 30-rsq*lu*lu         mulsd   %xmm3,%xmm4     ## lu*(3-rsq*lu*lu)         mulsd   nb103_half(%rsp),%xmm4   ## iter1 ( new lu)         movapd %xmm4,%xmm3        mulsd %xmm4,%xmm4       ## lu*lu         mulsd %xmm4,%xmm6       ## rsq*lu*lu         movapd nb103_three(%rsp),%xmm4        subsd %xmm6,%xmm4       ## 3-rsq*lu*lu         mulsd %xmm3,%xmm4       ## lu*( 3-rsq*lu*lu)         mulsd nb103_half(%rsp),%xmm4   ## rinv         movapd  %xmm4,%xmm6     ## rinvH1 in xmm6         ## rsqH2 - seed in xmm2         cvtsd2ss %xmm5,%xmm2        rsqrtss %xmm2,%xmm2        cvtss2sd %xmm2,%xmm2        movapd  %xmm2,%xmm3        mulsd   %xmm2,%xmm2        movapd  nb103_three(%rsp),%xmm4        mulsd   %xmm5,%xmm2     ## rsq*lu*lu         subsd   %xmm2,%xmm4     ## 30-rsq*lu*lu         mulsd   %xmm3,%xmm4     ## lu*(3-rsq*lu*lu)         mulsd   nb103_half(%rsp),%xmm4   ## iter1 ( new lu)         movapd %xmm4,%xmm3        mulsd %xmm4,%xmm4       ## lu*lu         mulsd %xmm4,%xmm5       ## rsq*lu*lu         movapd nb103_three(%rsp),%xmm4        subsd %xmm5,%xmm4       ## 3-rsq*lu*lu         mulsd %xmm3,%xmm4       ## lu*( 3-rsq*lu*lu)         mulsd nb103_half(%rsp),%xmm4   ## rinv         movapd  %xmm4,%xmm5     ## rinvH2 in xmm5         ## do M interactions         movapd  %xmm7,%xmm4        mulsd   %xmm4,%xmm4     ## xmm7=rinv, xmm4=rinvsq         mulsd  nb103_qqM(%rsp),%xmm7    ## xmm7=vcoul         mulsd  %xmm7,%xmm4      ## total fsM in xmm4         addsd  nb103_vctot(%rsp),%xmm7        movlpd %xmm7,nb103_vctot(%rsp)        movapd nb103_dxM(%rsp),%xmm0        movapd nb103_dyM(%rsp),%xmm1        movapd nb103_dzM(%rsp),%xmm2        mulsd  %xmm4,%xmm0        mulsd  %xmm4,%xmm1        mulsd  %xmm4,%xmm2        ## update M forces         movapd nb103_fixM(%rsp),%xmm3        movapd nb103_fiyM(%rsp),%xmm4        movapd nb103_fizM(%rsp),%xmm7        addsd  %xmm0,%xmm3        addsd  %xmm1,%xmm4        addsd  %xmm2,%xmm7        movlpd %xmm3,nb103_fixM(%rsp)        movlpd %xmm4,nb103_fiyM(%rsp)        movlpd %xmm7,nb103_fizM(%rsp)        ## update j forces with water M         movlpd %xmm0,nb103_fjx(%rsp)        movlpd %xmm1,nb103_fjy(%rsp)        movlpd %xmm2,nb103_fjz(%rsp)        ## H1 interactions         movapd  %xmm6,%xmm4        mulsd   %xmm4,%xmm4     ## xmm6=rinv, xmm4=rinvsq         mulsd  nb103_qqH(%rsp),%xmm6    ## xmm6=vcoul         mulsd  %xmm6,%xmm4              ## total fsH1 in xmm4         addsd  nb103_vctot(%rsp),%xmm6        movapd nb103_dxH1(%rsp),%xmm0        movapd nb103_dyH1(%rsp),%xmm1        movapd nb103_dzH1(%rsp),%xmm2        movlpd %xmm6,nb103_vctot(%rsp)        mulsd  %xmm4,%xmm0        mulsd  %xmm4,%xmm1        mulsd  %xmm4,%xmm2        ## update H1 forces         movapd nb103_fixH1(%rsp),%xmm3        movapd nb103_fiyH1(%rsp),%xmm4        movapd nb103_fizH1(%rsp),%xmm7        addsd  %xmm0,%xmm3        addsd  %xmm1,%xmm4        addsd  %xmm2,%xmm7        movlpd %xmm3,nb103_fixH1(%rsp)        movlpd %xmm4,nb103_fiyH1(%rsp)        movlpd %xmm7,nb103_fizH1(%rsp)        ## update j forces with water H1         addsd  nb103_fjx(%rsp),%xmm0        addsd  nb103_fjy(%rsp),%xmm1        addsd  nb103_fjz(%rsp),%xmm2        movsd %xmm0,nb103_fjx(%rsp)        movsd %xmm1,nb103_fjy(%rsp)        movsd %xmm2,nb103_fjz(%rsp)        ## H2 interactions         movapd  %xmm5,%xmm4        mulsd   %xmm4,%xmm4     ## xmm5=rinv, xmm4=rinvsq         mulsd  nb103_qqH(%rsp),%xmm5    ## xmm5=vcoul         mulsd  %xmm5,%xmm4              ## total fsH1 in xmm4         addsd  nb103_vctot(%rsp),%xmm5        movapd nb103_dxH2(%rsp),%xmm0        movapd nb103_dyH2(%rsp),%xmm1        movapd nb103_dzH2(%rsp),%xmm2        movlpd %xmm5,nb103_vctot(%rsp)        mulsd  %xmm4,%xmm0        mulsd  %xmm4,%xmm1        mulsd  %xmm4,%xmm2        ## update H2 forces         movapd nb103_fixH2(%rsp),%xmm3        movapd nb103_fiyH2(%rsp),%xmm4        movapd nb103_fizH2(%rsp),%xmm7        addsd  %xmm0,%xmm3        addsd  %xmm1,%xmm4        addsd  %xmm2,%xmm7        movlpd %xmm3,nb103_fixH2(%rsp)        movlpd %xmm4,nb103_fiyH2(%rsp)        movlpd %xmm7,nb103_fizH2(%rsp)        movq nb103_faction(%rbp),%rdi        ## update j forces         addsd  nb103_fjx(%rsp),%xmm0        addsd  nb103_fjy(%rsp),%xmm1        addsd  nb103_fjz(%rsp),%xmm2        movlpd (%rdi,%rax,8),%xmm3        movlpd 8(%rdi,%rax,8),%xmm4        movlpd 16(%rdi,%rax,8),%xmm5        addsd %xmm0,%xmm3        addsd %xmm1,%xmm4        addsd %xmm2,%xmm5        movlpd %xmm3,(%rdi,%rax,8)        movlpd %xmm4,8(%rdi,%rax,8)        movlpd %xmm5,16(%rdi,%rax,8)

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?