nb_kernel102_ia32_sse.intel_syntax.s

来自「最著名最快的分子模拟软件」· S 代码 · 共 2,198 行 · 第 1/5 页

S
2,198
字号
	movss  xmm3, [esi + ecx*4 + 8]	movss  xmm4, [esi + ecx*4 + 20]	movss  xmm5, [esi + ecx*4 + 32]	movhps xmm0, [esi + ebx*4 + 4]	movhps xmm1, [esi + ebx*4 + 16]	movhps xmm2, [esi + ebx*4 + 28]		movhps xmm3, [esi + edx*4 + 4]	movhps xmm4, [esi + edx*4 + 16]	movhps xmm5, [esi + edx*4 + 28]		shufps xmm0, xmm3, 204  ;# constant 11001100	shufps xmm1, xmm4, 204  ;# constant 11001100	shufps xmm2, xmm5, 204  ;# constant 11001100	movaps [esp + nb102_jzO],  xmm0	movaps [esp + nb102_jzH1],  xmm1	movaps [esp + nb102_jzH2],  xmm2	movaps xmm0, [esp + nb102_ixO]	movaps xmm1, [esp + nb102_iyO]	movaps xmm2, [esp + nb102_izO]	movaps xmm3, [esp + nb102_ixO]	movaps xmm4, [esp + nb102_iyO]	movaps xmm5, [esp + nb102_izO]	subps  xmm0, [esp + nb102_jxO]	subps  xmm1, [esp + nb102_jyO]	subps  xmm2, [esp + nb102_jzO]	subps  xmm3, [esp + nb102_jxH1]	subps  xmm4, [esp + nb102_jyH1]	subps  xmm5, [esp + nb102_jzH1]	movaps [esp + nb102_dxOO], xmm0	movaps [esp + nb102_dyOO], xmm1	movaps [esp + nb102_dzOO], xmm2	mulps  xmm0, xmm0	mulps  xmm1, xmm1	mulps  xmm2, xmm2	movaps [esp + nb102_dxOH1], xmm3	movaps [esp + nb102_dyOH1], xmm4	movaps [esp + nb102_dzOH1], xmm5	mulps  xmm3, xmm3	mulps  xmm4, xmm4	mulps  xmm5, xmm5	addps  xmm0, xmm1	addps  xmm0, xmm2	addps  xmm3, xmm4	addps  xmm3, xmm5	movaps [esp + nb102_rsqOO], xmm0	movaps [esp + nb102_rsqOH1], xmm3	movaps xmm0, [esp + nb102_ixO]	movaps xmm1, [esp + nb102_iyO]	movaps xmm2, [esp + nb102_izO]	movaps xmm3, [esp + nb102_ixH1]	movaps xmm4, [esp + nb102_iyH1]	movaps xmm5, [esp + nb102_izH1]	subps  xmm0, [esp + nb102_jxH2]	subps  xmm1, [esp + nb102_jyH2]	subps  xmm2, [esp + nb102_jzH2]	subps  xmm3, [esp + nb102_jxO]	subps  xmm4, [esp + nb102_jyO]	subps  xmm5, [esp + nb102_jzO]	movaps [esp + nb102_dxOH2], xmm0	movaps [esp + nb102_dyOH2], xmm1	movaps [esp + nb102_dzOH2], xmm2	mulps  xmm0, xmm0	mulps  xmm1, xmm1	mulps  xmm2, xmm2	movaps [esp + nb102_dxH1O], xmm3	movaps [esp + nb102_dyH1O], xmm4	movaps [esp + nb102_dzH1O], xmm5	mulps  xmm3, xmm3	mulps  xmm4, xmm4	mulps  xmm5, xmm5	addps  xmm0, xmm1	addps  xmm0, xmm2	addps  xmm3, xmm4	addps  xmm3, xmm5	movaps [esp + nb102_rsqOH2], xmm0	movaps [esp + nb102_rsqH1O], xmm3	movaps xmm0, [esp + nb102_ixH1]	movaps xmm1, [esp + nb102_iyH1]	movaps xmm2, [esp + nb102_izH1]	movaps xmm3, [esp + nb102_ixH1]	movaps xmm4, [esp + nb102_iyH1]	movaps xmm5, [esp + nb102_izH1]	subps  xmm0, [esp + nb102_jxH1]	subps  xmm1, [esp + nb102_jyH1]	subps  xmm2, [esp + nb102_jzH1]	subps  xmm3, [esp + nb102_jxH2]	subps  xmm4, [esp + nb102_jyH2]	subps  xmm5, [esp + nb102_jzH2]	movaps [esp + nb102_dxH1H1], xmm0	movaps [esp + nb102_dyH1H1], xmm1	movaps [esp + nb102_dzH1H1], xmm2	mulps  xmm0, xmm0	mulps  xmm1, xmm1	mulps  xmm2, xmm2	movaps [esp + nb102_dxH1H2], xmm3	movaps [esp + nb102_dyH1H2], xmm4	movaps [esp + nb102_dzH1H2], xmm5	mulps  xmm3, xmm3	mulps  xmm4, xmm4	mulps  xmm5, xmm5	addps  xmm0, xmm1	addps  xmm0, xmm2	addps  xmm3, xmm4	addps  xmm3, xmm5	movaps [esp + nb102_rsqH1H1], xmm0	movaps [esp + nb102_rsqH1H2], xmm3	movaps xmm0, [esp + nb102_ixH2]	movaps xmm1, [esp + nb102_iyH2]	movaps xmm2, [esp + nb102_izH2]	movaps xmm3, [esp + nb102_ixH2]	movaps xmm4, [esp + nb102_iyH2]	movaps xmm5, [esp + nb102_izH2]	subps  xmm0, [esp + nb102_jxO]	subps  xmm1, [esp + nb102_jyO]	subps  xmm2, [esp + nb102_jzO]	subps  xmm3, [esp + nb102_jxH1]	subps  xmm4, [esp + nb102_jyH1]	subps  xmm5, [esp + nb102_jzH1]	movaps [esp + nb102_dxH2O], xmm0	movaps [esp + nb102_dyH2O], xmm1	movaps [esp + nb102_dzH2O], xmm2	mulps  xmm0, xmm0	mulps  xmm1, xmm1	mulps  xmm2, xmm2	movaps [esp + nb102_dxH2H1], xmm3	movaps [esp + nb102_dyH2H1], xmm4	movaps [esp + nb102_dzH2H1], xmm5	mulps  xmm3, xmm3	mulps  xmm4, xmm4	mulps  xmm5, xmm5	addps  xmm0, xmm1	addps  xmm0, xmm2	addps  xmm4, xmm3	addps  xmm4, xmm5	movaps [esp + nb102_rsqH2O], xmm0	movaps [esp + nb102_rsqH2H1], xmm4	movaps xmm0, [esp + nb102_ixH2]	movaps xmm1, [esp + nb102_iyH2]	movaps xmm2, [esp + nb102_izH2]	subps  xmm0, [esp + nb102_jxH2]	subps  xmm1, [esp + nb102_jyH2]	subps  xmm2, [esp + nb102_jzH2]	movaps [esp + nb102_dxH2H2], xmm0	movaps [esp + nb102_dyH2H2], xmm1	movaps [esp + nb102_dzH2H2], xmm2	mulps xmm0, xmm0	mulps xmm1, xmm1	mulps xmm2, xmm2	addps xmm0, xmm1	addps xmm0, xmm2	movaps [esp + nb102_rsqH2H2], xmm0			;# start doing invsqrt use rsq values in xmm0, xmm4 	rsqrtps xmm1, xmm0	rsqrtps xmm5, xmm4	movaps  xmm2, xmm1	movaps  xmm6, xmm5	mulps   xmm1, xmm1	mulps   xmm5, xmm5	movaps  xmm3, [esp + nb102_three]	movaps  xmm7, xmm3	mulps   xmm1, xmm0	mulps   xmm5, xmm4	subps   xmm3, xmm1	subps   xmm7, xmm5	mulps   xmm3, xmm2	mulps   xmm7, xmm6	mulps   xmm3, [esp + nb102_half] ;# rinvH2H2 	mulps   xmm7, [esp + nb102_half] ;# rinvH2H1 	movaps  [esp + nb102_rinvH2H2], xmm3	movaps  [esp + nb102_rinvH2H1], xmm7		rsqrtps xmm1, [esp + nb102_rsqOO]	rsqrtps xmm5, [esp + nb102_rsqOH1]	movaps  xmm2, xmm1	movaps  xmm6, xmm5	mulps   xmm1, xmm1	mulps   xmm5, xmm5	movaps  xmm3, [esp + nb102_three]	movaps  xmm7, xmm3	mulps   xmm1, [esp + nb102_rsqOO]	mulps   xmm5, [esp + nb102_rsqOH1]	subps   xmm3, xmm1	subps   xmm7, xmm5	mulps   xmm3, xmm2	mulps   xmm7, xmm6	mulps   xmm3, [esp + nb102_half] 	mulps   xmm7, [esp + nb102_half]	movaps  [esp + nb102_rinvOO], xmm3	movaps  [esp + nb102_rinvOH1], xmm7		rsqrtps xmm1, [esp + nb102_rsqOH2]	rsqrtps xmm5, [esp + nb102_rsqH1O]	movaps  xmm2, xmm1	movaps  xmm6, xmm5	mulps   xmm1, xmm1	mulps   xmm5, xmm5	movaps  xmm3, [esp + nb102_three]	movaps  xmm7, xmm3	mulps   xmm1, [esp + nb102_rsqOH2]	mulps   xmm5, [esp + nb102_rsqH1O]	subps   xmm3, xmm1	subps   xmm7, xmm5	mulps   xmm3, xmm2	mulps   xmm7, xmm6	mulps   xmm3, [esp + nb102_half] 	mulps   xmm7, [esp + nb102_half]	movaps  [esp + nb102_rinvOH2], xmm3	movaps  [esp + nb102_rinvH1O], xmm7		rsqrtps xmm1, [esp + nb102_rsqH1H1]	rsqrtps xmm5, [esp + nb102_rsqH1H2]	movaps  xmm2, xmm1	movaps  xmm6, xmm5	mulps   xmm1, xmm1	mulps   xmm5, xmm5	movaps  xmm3, [esp + nb102_three]	movaps  xmm7, xmm3	mulps   xmm1, [esp + nb102_rsqH1H1]	mulps   xmm5, [esp + nb102_rsqH1H2]	subps   xmm3, xmm1	subps   xmm7, xmm5	mulps   xmm3, xmm2	mulps   xmm7, xmm6	mulps   xmm3, [esp + nb102_half] 	mulps   xmm7, [esp + nb102_half]	movaps  [esp + nb102_rinvH1H1], xmm3	movaps  [esp + nb102_rinvH1H2], xmm7		rsqrtps xmm1, [esp + nb102_rsqH2O]	movaps  xmm2, xmm1	mulps   xmm1, xmm1	movaps  xmm3, [esp + nb102_three]	mulps   xmm1, [esp + nb102_rsqH2O]	subps   xmm3, xmm1	mulps   xmm3, xmm2	mulps   xmm3, [esp + nb102_half] 	movaps  [esp + nb102_rinvH2O], xmm3	;# start with OO interaction 	movaps xmm0, [esp + nb102_rinvOO]	movaps xmm7, xmm0	mulps  xmm0, xmm0	mulps  xmm7, [esp + nb102_qqOO]	mulps  xmm0, xmm7		addps  xmm7, [esp + nb102_vctot] 	movaps xmm1, xmm0	movaps xmm2, xmm0	xorps xmm3, xmm3	movaps xmm4, xmm3	movaps xmm5, xmm3	mulps xmm0, [esp + nb102_dxOO]	mulps xmm1, [esp + nb102_dyOO]	mulps xmm2, [esp + nb102_dzOO]	subps xmm3, xmm0	subps xmm4, xmm1	subps xmm5, xmm2	addps xmm0, [esp + nb102_fixO]	addps xmm1, [esp + nb102_fiyO]	addps xmm2, [esp + nb102_fizO]	movaps [esp + nb102_fjxO], xmm3	movaps [esp + nb102_fjyO], xmm4	movaps [esp + nb102_fjzO], xmm5	movaps [esp + nb102_fixO], xmm0	movaps [esp + nb102_fiyO], xmm1	movaps [esp + nb102_fizO], xmm2	;# O-H1 interaction 	movaps xmm0, [esp + nb102_rinvOH1]	movaps xmm1, xmm0	mulps xmm0, xmm0	mulps xmm1, [esp + nb102_qqOH]	mulps xmm0, xmm1	;# fsOH1  	addps xmm7, xmm1	;# add to local vctot 	movaps xmm1, xmm0	movaps xmm2, xmm0		xorps xmm3, xmm3	movaps xmm4, xmm3	movaps xmm5, xmm3	mulps xmm0, [esp + nb102_dxOH1]	mulps xmm1, [esp + nb102_dyOH1]	mulps xmm2, [esp + nb102_dzOH1]	subps xmm3, xmm0	subps xmm4, xmm1	subps xmm5, xmm2	addps xmm0, [esp + nb102_fixO]	addps xmm1, [esp + nb102_fiyO]	addps xmm2, [esp + nb102_fizO]	movaps [esp + nb102_fjxH1], xmm3	movaps [esp + nb102_fjyH1], xmm4	movaps [esp + nb102_fjzH1], xmm5	movaps [esp + nb102_fixO], xmm0	movaps [esp + nb102_fiyO], xmm1	movaps [esp + nb102_fizO], xmm2	;# O-H2 interaction  	movaps xmm0, [esp + nb102_rinvOH2]	movaps xmm1, xmm0	mulps xmm0, xmm0	mulps xmm1, [esp + nb102_qqOH]	mulps xmm0, xmm1	;# fsOH2  	addps xmm7, xmm1	;# add to local vctot 	movaps xmm1, xmm0	movaps xmm2, xmm0		xorps xmm3, xmm3	movaps xmm4, xmm3	movaps xmm5, xmm3	mulps xmm0, [esp + nb102_dxOH2]	mulps xmm1, [esp + nb102_dyOH2]	mulps xmm2, [esp + nb102_dzOH2]	subps xmm3, xmm0	subps xmm4, xmm1	subps xmm5, xmm2	addps xmm0, [esp + nb102_fixO]	addps xmm1, [esp + nb102_fiyO]	addps xmm2, [esp + nb102_fizO]	movaps [esp + nb102_fjxH2], xmm3	movaps [esp + nb102_fjyH2], xmm4	movaps [esp + nb102_fjzH2], xmm5	movaps [esp + nb102_fixO], xmm0	movaps [esp + nb102_fiyO], xmm1	movaps [esp + nb102_fizO], xmm2	;# H1-O interaction 	movaps xmm0, [esp + nb102_rinvH1O]	movaps xmm1, xmm0	mulps xmm0, xmm0	mulps xmm1, [esp + nb102_qqOH]	mulps xmm0, xmm1	;# fsH1O 	addps xmm7, xmm1	;# add to local vctot 	movaps xmm1, xmm0	movaps xmm2, xmm0	movaps xmm3, [esp + nb102_fjxO]	movaps xmm4, [esp + nb102_fjyO]	movaps xmm5, [esp + nb102_fjzO]	mulps xmm0, [esp + nb102_dxH1O]	mulps xmm1, [esp + nb102_dyH1O]	mulps xmm2, [esp + nb102_dzH1O]	subps xmm3, xmm0	subps xmm4, xmm1	subps xmm5, xmm2	addps xmm0, [esp + nb102_fixH1]	addps xmm1, [esp + nb102_fiyH1]	addps xmm2, [esp + nb102_fizH1]	movaps [esp + nb102_fjxO], xmm3	movaps [esp + nb102_fjyO], xmm4	movaps [esp + nb102_fjzO], xmm5	movaps [esp + nb102_fixH1], xmm0	movaps [esp + nb102_fiyH1], xmm1	movaps [esp + nb102_fizH1], xmm2	;# H1-H1 interaction 	movaps xmm0, [esp + nb102_rinvH1H1]	movaps xmm1, xmm0	mulps xmm0, xmm0	mulps xmm1, [esp + nb102_qqHH]	mulps xmm0, xmm1	;# fsH1H1 	addps xmm7, xmm1	;# add to local vctot 	movaps xmm1, xmm0	movaps xmm2, xmm0	movaps xmm3, [esp + nb102_fjxH1]	movaps xmm4, [esp + nb102_fjyH1]	movaps xmm5, [esp + nb102_fjzH1]	mulps xmm0, [esp + nb102_dxH1H1]	mulps xmm1, [esp + nb102_dyH1H1]	mulps xmm2, [esp + nb102_dzH1H1]	subps xmm3, xmm0	subps xmm4, xmm1	subps xmm5, xmm2	addps xmm0, [esp + nb102_fixH1]	addps xmm1, [esp + nb102_fiyH1]	addps xmm2, [esp + nb102_fizH1]	movaps [esp + nb102_fjxH1], xmm3	movaps [esp + nb102_fjyH1], xmm4	movaps [esp + nb102_fjzH1], xmm5	movaps [esp + nb102_fixH1], xmm0	movaps [esp + nb102_fiyH1], xmm1	movaps [esp + nb102_fizH1], xmm2	;# H1-H2 interaction 	movaps xmm0, [esp + nb102_rinvH1H2]	movaps xmm1, xmm0	mulps xmm0, xmm0	mulps xmm1, [esp + nb102_qqHH]	mulps xmm0, xmm1	;# fsOH2  	addps xmm7, xmm1	;# add to local vctot 	movaps xmm1, xmm0	movaps xmm2, xmm0	movaps xmm3, [esp + nb102_fjxH2]	movaps xmm4, [esp + nb102_fjyH2]	movaps xmm5, [esp + nb102_fjzH2]	mulps xmm0, [esp + nb102_dxH1H2]	mulps xmm1, [esp + nb102_dyH1H2]	mulps xmm2, [esp + nb102_dzH1H2]	subps xmm3, xmm0	subps xmm4, xmm1	subps xmm5, xmm2	addps xmm0, [esp + nb102_fixH1]	addps xmm1, [esp + nb102_fiyH1]	addps xmm2, [esp + nb102_fizH1]	movaps [esp + nb102_fjxH2], xmm3	movaps [esp + nb102_fjyH2], xmm4	movaps [esp + nb102_fjzH2], xmm5	movaps [esp + nb102_fixH1], xmm0	movaps [esp + nb102_fiyH1], xmm1	movaps [esp + nb102_fizH1], xmm2	;# H2-O interaction 	movaps xmm0, [esp + nb102_rinvH2O]	movaps xmm1, xmm0	mulps xmm0, xmm0	mulps xmm1, [esp + nb102_qqOH]	mulps xmm0, xmm1	;# fsH2O 	addps xmm7, xmm1	;# add to local vctot 	movaps xmm1, xmm0	movaps xmm2, xmm0	movaps xmm3, [esp + nb102_fjxO]	movaps xmm4, [esp + nb102_fjyO]	movaps xmm5, [esp + nb102_fjzO]	mulps xmm0, [esp + nb102_dxH2O]	mulps xmm1, [esp + nb102_dyH2O]	mulps xmm2, [esp + nb102_dzH2O]	subps xmm3, xmm0	subps xmm4, xmm1	subps xmm5, xmm2	addps xmm0, [esp + nb102_fixH2]	addps xmm1, [esp + nb102_fiyH2]	addps xmm2, [esp + nb102_fizH2]	movaps [esp + nb102_fjxO], xmm3

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?