⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 nb_kernel112_ia32_3dnow.intel_syntax.s

📁 最著名最快的分子模拟软件
💻 S
📖 第 1 页 / 共 3 页
字号:
		pfadd mm0, mm5		;# add up total force on j particle.  	pfadd mm1, mm7	;# update j particle force 	movq mm2,  [edi + eax*4]	movd mm3,  [edi + eax*4 + 8]	pfsub mm2, mm0	pfsub mm3, mm1	movq [edi + eax*4], mm2	movd [edi + eax*4 +8], mm3	;# interactions with j H1 	movq  mm0, [esi + eax*4 + 12]	movd  mm1, [esi + eax*4 + 20]	;# copy & expand to mm2-mm4 for the H interactions 	movq  mm2, mm0	movq  mm3, mm0	movq  mm4, mm1	punpckldq mm2,mm2	punpckhdq mm3,mm3	punpckldq mm4,mm4		movd mm6, [esp + nb112_qqOH]	movq mm7, [esp + nb112_qqHH]		pfsubr mm0, [esp + nb112_ixO]	pfsubr mm1, [esp + nb112_izO]			movq  [esp + nb112_dxO], mm0	pfmul mm0,mm0	movd  [esp + nb112_dzO], mm1		pfmul mm1,mm1	pfacc mm0, mm1	pfadd mm0, mm1		;# mm0=rsqO 		punpckldq mm2, mm2	punpckldq mm3, mm3	punpckldq mm4, mm4  ;# mm2-mm4 is jx-jz 	pfsubr mm2, [esp + nb112_ixH]	pfsubr mm3, [esp + nb112_iyH]	pfsubr mm4, [esp + nb112_izH] ;# mm2-mm4 is dxH-dzH 		movq [esp + nb112_dxH], mm2	movq [esp + nb112_dyH], mm3	movq [esp + nb112_dzH], mm4	pfmul mm2,mm2	pfmul mm3,mm3	pfmul mm4,mm4	pfadd mm3,mm2	pfadd mm3,mm4		;# mm3=rsqH     	pfrsqrt mm1,mm0    	movq mm2,mm1    	pfmul mm1,mm1    	pfrsqit1 mm1,mm0				    	pfrcpit2 mm1,mm2	;# mm1=invsqrt 	movq  mm4, mm1	pfmul mm4, mm4		;# mm4=invsq 	;# calculate potential and scalar force 	pfmul mm6, mm1		;# mm6=vcoul 	pfmul mm4, mm6		;# mm4=fscalar  	pfrsqrt mm5, mm3	pswapd mm3,mm3	pfrsqrt mm2, mm3	pswapd mm3,mm3	punpckldq mm5,mm2	;# seeds are in mm5 now, and rsq in mm3 	movq mm2, mm5	pfmul mm5,mm5    	pfrsqit1 mm5,mm3				    	pfrcpit2 mm5,mm2	;# mm5=invsqrt 	movq mm3,mm5	pfmul mm3,mm3		;# mm3=invsq 	pfmul mm7, mm5		;# mm7=vcoul 	pfmul mm3, mm7		;# mm3=fscal for the two H's. 	;# update vctot 	pfadd mm7, mm6	pfadd mm7, [esp + nb112_vctot]	movq [esp + nb112_vctot], mm7		;# spread oxygen fscalar to both positions 	punpckldq mm4,mm4	;# calc vectorial force for O 	movq mm0,  [esp + nb112_dxO]	movd mm1,  [esp + nb112_dzO]	pfmul mm0, mm4	pfmul mm1, mm4	;# calc vectorial force for H's 	movq mm5, [esp + nb112_dxH]	movq mm6, [esp + nb112_dyH]	movq mm7, [esp + nb112_dzH]	pfmul mm5, mm3	pfmul mm6, mm3	pfmul mm7, mm3		;# update iO particle force 	movq mm2,  [esp + nb112_fixO]	movd mm3,  [esp + nb112_fizO]	pfadd mm2, mm0	pfadd mm3, mm1	movq [esp + nb112_fixO], mm2	movd [esp + nb112_fizO], mm3	;# update iH forces 	movq mm2, [esp + nb112_fixH]	movq mm3, [esp + nb112_fiyH]	movq mm4, [esp + nb112_fizH]	pfadd mm2, mm5	pfadd mm3, mm6	pfadd mm4, mm7	movq [esp + nb112_fixH], mm2	movq [esp + nb112_fiyH], mm3	movq [esp + nb112_fizH], mm4		;# pack j forces from H in the same form as the oxygen force. 	pfacc mm5, mm6		;# mm5(l)=fjx(H1+ h2) mm5(h)=fjy(H1+ h2) 	pfacc mm7, mm7		;# mm7(l)=fjz(H1+ h2) 		pfadd mm0, mm5		;# add up total force on j particle.  	pfadd mm1, mm7	;# update j particle force 	movq mm2,  [edi + eax*4 + 12]	movd mm3,  [edi + eax*4 + 20]	pfsub mm2, mm0	pfsub mm3, mm1	movq [edi + eax*4 + 12], mm2	movd [edi + eax*4 + 20], mm3	;# interactions with j H2 	movq  mm0, [esi + eax*4 + 24]	movd  mm1, [esi + eax*4 + 32]	;# copy & expand to mm2-mm4 for the H interactions 	movq  mm2, mm0	movq  mm3, mm0	movq  mm4, mm1	punpckldq mm2,mm2	punpckhdq mm3,mm3	punpckldq mm4,mm4	movd mm6, [esp + nb112_qqOH]	movq mm7, [esp + nb112_qqHH]	pfsubr mm0, [esp + nb112_ixO]	pfsubr mm1, [esp + nb112_izO]			movq  [esp + nb112_dxO], mm0	pfmul mm0,mm0	movd  [esp + nb112_dzO], mm1		pfmul mm1,mm1	pfacc mm0, mm1	pfadd mm0, mm1		;# mm0=rsqO 		punpckldq mm2, mm2	punpckldq mm3, mm3	punpckldq mm4, mm4  ;# mm2-mm4 is jx-jz 	pfsubr mm2, [esp + nb112_ixH]	pfsubr mm3, [esp + nb112_iyH]	pfsubr mm4, [esp + nb112_izH] ;# mm2-mm4 is dxH-dzH 		movq [esp + nb112_dxH], mm2	movq [esp + nb112_dyH], mm3	movq [esp + nb112_dzH], mm4	pfmul mm2,mm2	pfmul mm3,mm3	pfmul mm4,mm4	pfadd mm3,mm2	pfadd mm3,mm4		;# mm3=rsqH     	pfrsqrt mm1,mm0    	movq mm2,mm1    	pfmul mm1,mm1    	pfrsqit1 mm1,mm0				    	pfrcpit2 mm1,mm2	;# mm1=invsqrt 	movq  mm4, mm1	pfmul mm4, mm4		;# mm4=invsq 	;# calculate potential and scalar force 	pfmul mm6, mm1		;# mm6=vcoul 	pfmul mm4, mm6		;# mm4=fscalar  	pfrsqrt mm5, mm3	pswapd mm3,mm3	pfrsqrt mm2, mm3	pswapd mm3,mm3	punpckldq mm5,mm2	;# seeds are in mm5 now, and rsq in mm3. 	movq mm2, mm5	pfmul mm5,mm5    	pfrsqit1 mm5,mm3				    	pfrcpit2 mm5,mm2	;# mm5=invsqrt 	movq mm3,mm5	pfmul mm3,mm3		;# mm3=invsq 	pfmul mm7, mm5		;# mm7=vcoul 	pfmul mm3, mm7		;# mm3=fscal for the two H's. 	;# update vctot 	pfadd mm7, mm6	pfadd mm7, [esp + nb112_vctot]	movq [esp + nb112_vctot], mm7		;# spread oxygen fscalar to both positions 	punpckldq mm4,mm4	;# calc vectorial force for O 	movq mm0,  [esp + nb112_dxO]	movd mm1,  [esp + nb112_dzO]	pfmul mm0, mm4	pfmul mm1, mm4	;# calc vectorial force for H's 	movq mm5, [esp + nb112_dxH]	movq mm6, [esp + nb112_dyH]	movq mm7, [esp + nb112_dzH]	pfmul mm5, mm3	pfmul mm6, mm3	pfmul mm7, mm3		;# update iO particle force 	movq mm2,  [esp + nb112_fixO]	movd mm3,  [esp + nb112_fizO]	pfadd mm2, mm0	pfadd mm3, mm1	movq [esp + nb112_fixO], mm2	movd [esp + nb112_fizO], mm3	;# update iH forces 	movq mm2, [esp + nb112_fixH]	movq mm3, [esp + nb112_fiyH]	movq mm4, [esp + nb112_fizH]	pfadd mm2, mm5	pfadd mm3, mm6	pfadd mm4, mm7	movq [esp + nb112_fixH], mm2	movq [esp + nb112_fiyH], mm3	movq [esp + nb112_fizH], mm4		;# pack j forces from H in the same form as the oxygen force. 	pfacc mm5, mm6		;# mm5(l)=fjx(H1+ h2) mm5(h)=fjy(H1+ h2) 	pfacc mm7, mm7		;# mm7(l)=fjz(H1+ h2) 		pfadd mm0, mm5		;# add up total force on j particle.  	pfadd mm1, mm7	;# update j particle force 	movq mm2,  [edi + eax*4 + 24]	movd mm3,  [edi + eax*4 + 32]	pfsub mm2, mm0	pfsub mm3, mm1	movq [edi + eax*4 + 24], mm2	movd [edi + eax*4 + 32], mm3		;#  done  - one more? 	dec dword ptr [esp + nb112_innerk]	jz  .nb112_updateouterdata	jmp .nb112_inner_loop	.nb112_updateouterdata:		mov   ecx, [esp + nb112_ii3]	movq  mm6, [edi + ecx*4]       ;# increment iO force  	movd  mm7, [edi + ecx*4 + 8]		pfadd mm6, [esp + nb112_fixO]	pfadd mm7, [esp + nb112_fizO]	movq  [edi + ecx*4],    mm6	movd  [edi + ecx*4 +8], mm7	movq  mm0, [esp + nb112_fixH]	movq  mm3, [esp + nb112_fiyH]	movq  mm1, [esp + nb112_fizH]	movq  mm2, mm0	punpckldq mm0, mm3	;# mm0(l)=fxH1, mm0(h)=fyH1 	punpckhdq mm2, mm3	;# mm2(l)=fxH2, mm2(h)=fyH2 	movq mm3, mm1	pswapd mm3,mm3			;# mm1 is fzH1 	;# mm3 is fzH2 	movq  mm6, [edi + ecx*4 + 12]       ;# increment iH1 force  	movd  mm7, [edi + ecx*4 + 20] 		pfadd mm6, mm0	pfadd mm7, mm1	movq  [edi + ecx*4 + 12],  mm6	movd  [edi + ecx*4 + 20],  mm7		movq  mm6, [edi + ecx*4 + 24]       ;# increment iH2 force 	movd  mm7, [edi + ecx*4 + 32] 		pfadd mm6, mm2	pfadd mm7, mm3	movq  [edi + ecx*4 + 24],  mm6	movd  [edi + ecx*4 + 32],  mm7		mov   ebx, [ebp + nb112_fshift]    ;# increment fshift force 	mov   edx, [esp + nb112_is3]	movq  mm6, [ebx + edx*4]		movd  mm7, [ebx + edx*4 + 8]		pfadd mm6, [esp + nb112_fixO]	pfadd mm7, [esp + nb112_fizO]	pfadd mm6, mm0	pfadd mm7, mm1	pfadd mm6, mm2	pfadd mm7, mm3	movq  [ebx + edx*4],     mm6	movd  [ebx + edx*4 + 8], mm7		;# get n from stack	mov esi, [esp + nb112_n]        ;# get group index for i particle         mov   edx, [ebp + nb112_gid]      	;# base of gid[]        mov   edx, [edx + esi*4]		;# ggid=gid[n]	movq  mm7, [esp + nb112_vctot]     	pfacc mm7,mm7	          ;# get and sum the two parts of total potential 	mov   eax, [ebp + nb112_Vc]	movd  mm6, [eax + edx*4] 	pfadd mm6, mm7	movd  [eax + edx*4], mm6          ;# increment vc[gid] 	movq  mm7, [esp + nb112_Vvdwtot]     	pfacc mm7,mm7	          ;# get and sum the two parts of total potential 	mov   eax, [ebp + nb112_Vvdw]	movd  mm6, [eax + edx*4] 	pfadd mm6, mm7	movd  [eax + edx*4], mm6          ;# increment Vvdwtot[gid]        ;# finish if last         mov ecx, [esp + nb112_nn1]	;# esi already loaded with n	inc esi        sub ecx, esi        jecxz .nb112_outerend        ;# not last, iterate outer loop once more!          mov [esp + nb112_n], esi        jmp .nb112_outer.nb112_outerend:        ;# check if more outer neighborlists remain        mov   ecx, [esp + nb112_nri]	;# esi already loaded with n above        sub   ecx, esi        jecxz .nb112_end        ;# non-zero, do one more workunit        jmp   .nb112_threadloop.nb112_end:	femms	mov eax, [esp + nb112_nouter] 		mov ebx, [esp + nb112_ninner]	mov ecx, [ebp + nb112_outeriter]	mov edx, [ebp + nb112_inneriter]	mov [ecx], eax	mov [edx], ebx		add esp, 220	pop edi	pop esi    	pop edx    	pop ecx    	pop ebx    	pop eax	leave	ret.globl nb_kernel112nf_ia32_3dnow.globl _nb_kernel112nf_ia32_3dnownb_kernel112nf_ia32_3dnow:	_nb_kernel112nf_ia32_3dnow:	.equiv		nb112nf_p_nri,		8.equiv		nb112nf_iinr,		12.equiv		nb112nf_jindex,		16.equiv		nb112nf_jjnr,		20.equiv		nb112nf_shift,		24.equiv		nb112nf_shiftvec,	28.equiv		nb112nf_fshift,		32.equiv		nb112nf_gid,		36.equiv		nb112nf_pos,		40		.equiv		nb112nf_faction,	44.equiv		nb112nf_charge,		48.equiv		nb112nf_p_facel,		52.equiv		nb112nf_p_krf,		56	.equiv		nb112nf_p_crf,		60	.equiv		nb112nf_Vc,		64	.equiv		nb112nf_type,		68.equiv		nb112nf_p_ntype,	72.equiv		nb112nf_vdwparam,	76	.equiv		nb112nf_Vvdw,		80	.equiv		nb112nf_p_tabscale,	84	.equiv		nb112nf_VFtab,		88.equiv		nb112nf_invsqrta,	92	.equiv		nb112nf_dvda,		96.equiv          nb112nf_p_gbtabscale,   100.equiv          nb112nf_GBtab,          104.equiv          nb112nf_p_nthreads,     108.equiv          nb112nf_count,          112.equiv          nb112nf_mtx,            116.equiv          nb112nf_outeriter,      120.equiv          nb112nf_inneriter,      124.equiv          nb112nf_work,           128			;# stack offsets for local variables .equiv		nb112nf_is3,		0.equiv		nb112nf_ii3,		4.equiv		nb112nf_ixO,		8.equiv		nb112nf_iyO,		12.equiv		nb112nf_izO,		16	.equiv		nb112nf_ixH,		20  .equiv		nb112nf_iyH,		28  .equiv		nb112nf_izH,		36  .equiv		nb112nf_qqOO,		44  .equiv		nb112nf_qqOH,		52  .equiv		nb112nf_qqHH,		60  .equiv		nb112nf_c6,		68  .equiv		nb112nf_c12,		76 .equiv		nb112nf_vctot,		84 .equiv		nb112nf_Vvdwtot,	92 .equiv		nb112nf_innerjjnr,	100.equiv		nb112nf_innerk,		104.equiv          nb112nf_n,              108 ;# idx for outer loop.equiv          nb112nf_nn1,            112 ;# number of outer iterations

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -