nb_kernel211_ppc_altivec.c

来自「最著名最快的分子模拟软件」· C语言 代码 · 共 869 行 · 第 1/3 页

C
869
字号
	vector float rinvO,rinvH1,rinvH2,rinvsqO,rsqO,rsqH1,rsqH2;  	int n,k,ii,is3,ii3,ntiA,nj0,nj1;	int jnra,jnrb,jnrc,jnrd;	int j3a,j3b,j3c,j3d;	int nri, ntype, nouter, ninner;	int tja,tjb,tjc,tjd;#ifdef GMX_THREADS	int nn0, nn1;#endif    nouter   = 0;    ninner   = 0;    nri      = *p_nri;    ntype    = *p_ntype;	nul=vec_zero();	vfacel=load_float_and_splat(p_facel);	vkrf=load_float_and_splat(p_krf);	vcrf=load_float_and_splat(p_crf);	ii         = iinr[0];	iqO        = vec_madd(load_float_and_splat(charge+ii),vfacel,nul);	iqH        = vec_madd(load_float_and_splat(charge+ii+1),vfacel,nul);	ntiA       = 2*ntype*type[ii];  #ifdef GMX_THREADS    nthreads = *p_nthreads;	do {		gmx_thread_mutex_lock((gmx_thread_mutex_t *)mtx);		nn0              = *count;		nn1              = nn0+(nri-nn0)/(2*nthreads)+3;		*count           = nn1;		gmx_thread_mutex_unlock((gmx_thread_mutex_t *)mtx);		if(nn1>nri) nn1=nri;		for(n=nn0; (n<nn1); n++) {#if 0		} /* maintain correct indentation even with conditional left braces */#endif#else /* without gmx_threads */		for(n=0;n<nri;n++) {#endif  			is3        = 3*shift[n];			ii         = iinr[n];			ii3        = 3*ii;			load_1_3atoms_shift_and_splat(pos+ii3,shiftvec+is3,&iOx,&iOy,&iOz,										  &iH1x,&iH1y,&iH1z,&iH2x,&iH2y,&iH2z);			vctot      = nul;			Vvdwtot     = nul;			nj0        = jindex[n];			nj1        = jindex[n+1];    			for(k=nj0; k<(nj1-3); k+=4) {				jnra            = jjnr[k];				jnrb            = jjnr[k+1];				jnrc            = jjnr[k+2];				jnrd            = jjnr[k+3];				j3a             = 3*jnra;				j3b             = 3*jnrb;				j3c             = 3*jnrc;				j3d             = 3*jnrd;				transpose_4_to_3(load_xyz(pos+j3a),								 load_xyz(pos+j3b),								 load_xyz(pos+j3c),								 load_xyz(pos+j3d),&dH2x,&dH2y,&dH2z);				dOx             = vec_sub(iOx,dH2x);				dOy             = vec_sub(iOy,dH2y);				dOz             = vec_sub(iOz,dH2z);				dH1x            = vec_sub(iH1x,dH2x);				dH1y            = vec_sub(iH1y,dH2y);				dH1z            = vec_sub(iH1z,dH2z);				dH2x            = vec_sub(iH2x,dH2x);				dH2y            = vec_sub(iH2y,dH2y);				dH2z            = vec_sub(iH2z,dH2z);      				rsqO            = vec_madd(dOx,dOx,nul);				rsqH1           = vec_madd(dH1x,dH1x,nul);				rsqH2           = vec_madd(dH2x,dH2x,nul);				rsqO            = vec_madd(dOy,dOy,rsqO);				rsqH1           = vec_madd(dH1y,dH1y,rsqH1);				rsqH2           = vec_madd(dH2y,dH2y,rsqH2);				rsqO            = vec_madd(dOz,dOz,rsqO);				rsqH1           = vec_madd(dH1z,dH1z,rsqH1);				rsqH2           = vec_madd(dH2z,dH2z,rsqH2);				do_3_invsqrt(rsqO,rsqH1,rsqH2,&rinvO,&rinvH1,&rinvH2);				rinvsqO         = vec_madd(rinvO,rinvO,nul);				rinvsix         = vec_madd(rinvsqO,rinvsqO,nul);				rinvsix         = vec_madd(rinvsix,rinvsqO,nul);				tja             = ntiA+2*type[jnra];				tjb             = ntiA+2*type[jnrb];				tjc             = ntiA+2*type[jnrc];				tjd             = ntiA+2*type[jnrd];				/* load 4 j charges and multiply by iq */				jq=load_4_float(charge+jnra,charge+jnrb,								charge+jnrc,charge+jnrd);				load_4_pair(vdwparam+tja,vdwparam+tjb,vdwparam+tjc,vdwparam+tjd,&c6,&c12);				qqO             = vec_madd(iqO,jq,nul);				qqH             = vec_madd(iqH,jq,nul);				krsqO           = vec_madd(vkrf,rsqO,nul);				krsqH1          = vec_madd(vkrf,rsqH1,nul);				krsqH2          = vec_madd(vkrf,rsqH2,nul);				Vvdwtot          = vec_nmsub(c6,rinvsix,Vvdwtot);				Vvdwtot          = vec_madd(c12,vec_madd(rinvsix,rinvsix,nul),										   Vvdwtot);				vcoulO          = vec_add(rinvO,krsqO);				vcoulH1         = vec_add(rinvH1,krsqH1);				vcoulH2         = vec_add(rinvH2,krsqH2);				vcoulO          = vec_sub(vcoulO,vcrf);				vcoulH1         = vec_sub(vcoulH1,vcrf);				vcoulH2         = vec_sub(vcoulH2,vcrf);				vctot           = vec_madd(qqO,vcoulO,vctot);				vctot           = vec_madd(qqH,vcoulH1,vctot);				vctot           = vec_madd(qqH,vcoulH2,vctot);			} 			if(k<(nj1-2)) {				jnra            = jjnr[k];				jnrb            = jjnr[k+1];				jnrc            = jjnr[k+2];				j3a             = 3*jnra;				j3b             = 3*jnrb;				j3c             = 3*jnrc;				transpose_4_to_3(load_xyz(pos+j3a),								 load_xyz(pos+j3b),								 load_xyz(pos+j3c),nul,&dH2x,&dH2y,&dH2z);				dOx             = vec_sub(iOx,dH2x);				dOy             = vec_sub(iOy,dH2y);				dOz             = vec_sub(iOz,dH2z);				dH1x            = vec_sub(iH1x,dH2x);				dH1y            = vec_sub(iH1y,dH2y);				dH1z            = vec_sub(iH1z,dH2z);				dH2x            = vec_sub(iH2x,dH2x);				dH2y            = vec_sub(iH2y,dH2y);				dH2z            = vec_sub(iH2z,dH2z);      				rsqO            = vec_madd(dOx,dOx,nul);				rsqH1           = vec_madd(dH1x,dH1x,nul);				rsqH2           = vec_madd(dH2x,dH2x,nul);				rsqO            = vec_madd(dOy,dOy,rsqO);				rsqH1           = vec_madd(dH1y,dH1y,rsqH1);				rsqH2           = vec_madd(dH2y,dH2y,rsqH2);				rsqO            = vec_madd(dOz,dOz,rsqO);				rsqH1           = vec_madd(dH1z,dH1z,rsqH1);				rsqH2           = vec_madd(dH2z,dH2z,rsqH2);				zero_highest_element_in_3_vectors(&rsqO,&rsqH1,&rsqH2);				do_3_invsqrt(rsqO,rsqH1,rsqH2,&rinvO,&rinvH1,&rinvH2);				zero_highest_element_in_3_vectors(&rinvO,&rinvH1,&rinvH2);				rinvsqO         = vec_madd(rinvO,rinvO,nul);				rinvsix         = vec_madd(rinvsqO,rinvsqO,nul);				rinvsix         = vec_madd(rinvsix,rinvsqO,nul);				jq=load_3_float(charge+jnra,charge+jnrb,charge+jnrc);      				tja             = ntiA+2*type[jnra];				tjb             = ntiA+2*type[jnrb];				tjc             = ntiA+2*type[jnrc];				load_3_pair(vdwparam+tja,vdwparam+tjb,vdwparam+tjc,&c6,&c12);				qqO             = vec_madd(iqO,jq,nul);				qqH             = vec_madd(iqH,jq,nul);				krsqO           = vec_madd(vkrf,rsqO,nul);				krsqH1          = vec_madd(vkrf,rsqH1,nul);				krsqH2          = vec_madd(vkrf,rsqH2,nul);				Vvdwtot          = vec_nmsub(c6,rinvsix,Vvdwtot);				Vvdwtot          = vec_madd(c12,vec_madd(rinvsix,rinvsix,nul),										   Vvdwtot);				vcoulO          = vec_add(rinvO,krsqO);				vcoulH1         = vec_add(rinvH1,krsqH1);				vcoulH2         = vec_add(rinvH2,krsqH2);				vcoulO          = vec_sub(vcoulO,vcrf);				vcoulH1         = vec_sub(vcoulH1,vcrf);				vcoulH2         = vec_sub(vcoulH2,vcrf);				vctot           = vec_madd(qqO,vcoulO,vctot);				vctot           = vec_madd(qqH,vcoulH1,vctot);				vctot           = vec_madd(qqH,vcoulH2,vctot);			} else if(k<(nj1-1)) {				jnra            = jjnr[k];				jnrb            = jjnr[k+1];				j3a             = 3*jnra;				j3b             = 3*jnrb;				transpose_2_to_3(load_xyz(pos+j3a),								 load_xyz(pos+j3b),&dH2x,&dH2y,&dH2z);				dOx             = vec_sub(iOx,dH2x);				dOy             = vec_sub(iOy,dH2y);				dOz             = vec_sub(iOz,dH2z);				dH1x            = vec_sub(iH1x,dH2x);				dH1y            = vec_sub(iH1y,dH2y);				dH1z            = vec_sub(iH1z,dH2z);				dH2x            = vec_sub(iH2x,dH2x);				dH2y            = vec_sub(iH2y,dH2y);				dH2z            = vec_sub(iH2z,dH2z);      				rsqO            = vec_madd(dOx,dOx,nul);				rsqH1           = vec_madd(dH1x,dH1x,nul);				rsqH2           = vec_madd(dH2x,dH2x,nul);				rsqO            = vec_madd(dOy,dOy,rsqO);				rsqH1           = vec_madd(dH1y,dH1y,rsqH1);				rsqH2           = vec_madd(dH2y,dH2y,rsqH2);				rsqO            = vec_madd(dOz,dOz,rsqO);				rsqH1           = vec_madd(dH1z,dH1z,rsqH1);				rsqH2           = vec_madd(dH2z,dH2z,rsqH2);				zero_highest_2_elements_in_3_vectors(&rsqO,&rsqH1,&rsqH2);				do_3_invsqrt(rsqO,rsqH1,rsqH2,&rinvO,&rinvH1,&rinvH2);				zero_highest_2_elements_in_3_vectors(&rinvO,&rinvH1,&rinvH2);				rinvsqO         = vec_madd(rinvO,rinvO,nul);				rinvsix         = vec_madd(rinvsqO,rinvsqO,nul);				rinvsix         = vec_madd(rinvsix,rinvsqO,nul);				tja             = ntiA+2*type[jnra];				tjb             = ntiA+2*type[jnrb];				/* load 2 j charges and multiply by iq */				jq=load_2_float(charge+jnra,charge+jnrb);				load_2_pair(vdwparam+tja,vdwparam+tjb,&c6,&c12);				qqO             = vec_madd(iqO,jq,nul);				qqH             = vec_madd(iqH,jq,nul);				krsqO           = vec_madd(vkrf,rsqO,nul);				krsqH1          = vec_madd(vkrf,rsqH1,nul);				krsqH2          = vec_madd(vkrf,rsqH2,nul);				Vvdwtot          = vec_nmsub(c6,rinvsix,Vvdwtot);				Vvdwtot          = vec_madd(c12,vec_madd(rinvsix,rinvsix,nul),										   Vvdwtot);				vcoulO          = vec_add(rinvO,krsqO);				vcoulH1         = vec_add(rinvH1,krsqH1);				vcoulH2         = vec_add(rinvH2,krsqH2);				vcoulO          = vec_sub(vcoulO,vcrf);				vcoulH1         = vec_sub(vcoulH1,vcrf);				vcoulH2         = vec_sub(vcoulH2,vcrf);				vctot           = vec_madd(qqO,vcoulO,vctot);				vctot           = vec_madd(qqH,vcoulH1,vctot);				vctot           = vec_madd(qqH,vcoulH2,vctot);			} else if(k<nj1) {				jnra            = jjnr[k];				j3a             = 3*jnra;				transpose_1_to_3(load_xyz(pos+j3a),&dH2x,&dH2y,&dH2z);				dOx             = vec_sub(iOx,dH2x);				dOy             = vec_sub(iOy,dH2y);				dOz             = vec_sub(iOz,dH2z);				dH1x            = vec_sub(iH1x,dH2x);				dH1y            = vec_sub(iH1y,dH2y);				dH1z            = vec_sub(iH1z,dH2z);				dH2x            = vec_sub(iH2x,dH2x);				dH2y            = vec_sub(iH2y,dH2y);				dH2z            = vec_sub(iH2z,dH2z);      				rsqO            = vec_madd(dOx,dOx,nul);				rsqH1           = vec_madd(dH1x,dH1x,nul);				rsqH2           = vec_madd(dH2x,dH2x,nul);				rsqO            = vec_madd(dOy,dOy,rsqO);				rsqH1           = vec_madd(dH1y,dH1y,rsqH1);				rsqH2           = vec_madd(dH2y,dH2y,rsqH2);				rsqO            = vec_madd(dOz,dOz,rsqO);				rsqH1           = vec_madd(dH1z,dH1z,rsqH1);				rsqH2           = vec_madd(dH2z,dH2z,rsqH2);				zero_highest_3_elements_in_3_vectors(&rsqO,&rsqH1,&rsqH2);				do_3_invsqrt(rsqO,rsqH1,rsqH2,&rinvO,&rinvH1,&rinvH2);				zero_highest_3_elements_in_3_vectors(&rinvO,&rinvH1,&rinvH2);				rinvsqO         = vec_madd(rinvO,rinvO,nul);				rinvsix         = vec_madd(rinvsqO,rinvsqO,nul);				rinvsix         = vec_madd(rinvsix,rinvsqO,nul);				tja             = ntiA+2*type[jnra];				/* load 1 j charges and multiply by iq */				jq=load_1_float(charge+jnra);				load_1_pair(vdwparam+tja,&c6,&c12);				qqO             = vec_madd(iqO,jq,nul);				qqH             = vec_madd(iqH,jq,nul);				krsqO           = vec_madd(vkrf,rsqO,nul);				krsqH1          = vec_madd(vkrf,rsqH1,nul);				krsqH2          = vec_madd(vkrf,rsqH2,nul);				Vvdwtot          = vec_nmsub(c6,rinvsix,Vvdwtot);				Vvdwtot          = vec_madd(c12,vec_madd(rinvsix,rinvsix,nul),										   Vvdwtot);				vcoulO          = vec_add(rinvO,krsqO);				vcoulH1         = vec_add(rinvH1,krsqH1);				vcoulH2         = vec_add(rinvH2,krsqH2);				vcoulO          = vec_sub(vcoulO,vcrf);				vcoulH1         = vec_sub(vcoulH1,vcrf);				vcoulH2         = vec_sub(vcoulH2,vcrf);				vctot           = vec_madd(qqO,vcoulO,vctot);				vctot           = vec_madd(qqH,vcoulH1,vctot);				vctot           = vec_madd(qqH,vcoulH2,vctot);			}			/* update outer data */			add_vector_to_float(Vc+gid[n],vctot);			add_vector_to_float(Vvdw+gid[n],Vvdwtot);			ninner += nj1 - nj0;		}#ifdef GMX_THREADS		nouter += nn1 - nn0;	} while (nn1<nri);#else	nouter = nri;#endif	*outeriter = nouter;	*inneriter = ninner;}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?