nb_kernel233_ppc_altivec.c

来自「最著名最快的分子模拟软件」· C语言 代码 · 共 1,005 行 · 第 1/3 页

C
1,005
字号
				fsH2            = vec_madd(fsH2,qqH,nul);				vctot           = vec_madd(qqH,vcoulH2,vctot);				fsH1            = vec_madd(fsH1,rinvsqH1,nul);				fsH2            = vec_madd(fsH2,rinvsqH2,nul);				fiOx            = vec_madd(fsO,dOx,fiOx); /* +=fx */				dOx             = vec_nmsub(fsO,dOx,nul); /* -fx */				fiOy            = vec_madd(fsO,dOy,fiOy); /* +=fy */				dOy             = vec_nmsub(fsO,dOy,nul); /* -fy */				fiOz            = vec_madd(fsO,dOz,fiOz); /* +=fz */				dOz             = vec_nmsub(fsO,dOz,nul); /* -fz */				fiH1x           = vec_madd(fsH1,dH1x,fiH1x); /* +=fx */				dOx             = vec_nmsub(fsH1,dH1x,dOx); /* -fx */				fiH1y           = vec_madd(fsH1,dH1y,fiH1y); /* +=fy */				dOy             = vec_nmsub(fsH1,dH1y,dOy); /* -fy */				fiH1z           = vec_madd(fsH1,dH1z,fiH1z); /* +=fz */				dOz             = vec_nmsub(fsH1,dH1z,dOz); /* -fz */				fiH2x           = vec_madd(fsH2,dH2x,fiH2x); /* +=fx */				dOx             = vec_nmsub(fsH2,dH2x,dOx); /* -fx */				fiH2y           = vec_madd(fsH2,dH2y,fiH2y); /* +=fy */				dOy             = vec_nmsub(fsH2,dH2y,dOy); /* -fy */				fiH2z           = vec_madd(fsH2,dH2z,fiH2z); /* +=fz */				dOz             = vec_nmsub(fsH2,dH2z,dOz); /* -fz */				fiMx            = vec_madd(fsM,dMx,fiMx); /* +=fx */				dOx             = vec_nmsub(fsM,dMx,dOx); /* -fx */				fiMy            = vec_madd(fsM,dMy,fiMy); /* +=fy */				dOy             = vec_nmsub(fsM,dMy,dOy); /* -fy */				fiMz            = vec_madd(fsM,dMz,fiMz); /* +=fz */				dOz             = vec_nmsub(fsM,dMz,dOz); /* -fz */      				transpose_4_to_3(dOx,dOy,dOz,nul,&tmp1,&tmp2,&tmp3);				add_xyz_to_mem(faction+j3a,tmp1);				add_xyz_to_mem(faction+j3b,tmp2);				add_xyz_to_mem(faction+j3c,tmp3);			} else if(k<(nj1-1)) {				jnra            = jjnr[k];				jnrb            = jjnr[k+1];				jnrc            = jjnr[k+2];				j3a             = 3*jnra;				j3b             = 3*jnrb;				j3c             = 3*jnrc;				transpose_2_to_3(load_xyz(pos+j3a),								 load_xyz(pos+j3b),&dMx,&dMy,&dMz);				dOx             = vec_sub(iOx,dMx);				dOy             = vec_sub(iOy,dMy);				dOz             = vec_sub(iOz,dMz);				dH1x            = vec_sub(iH1x,dMx);				dH1y            = vec_sub(iH1y,dMy);				dH1z            = vec_sub(iH1z,dMz);				dH2x            = vec_sub(iH2x,dMx);				dH2y            = vec_sub(iH2y,dMy);				dH2z            = vec_sub(iH2z,dMz);				dMx             = vec_sub(iMx,dMx);				dMy             = vec_sub(iMy,dMy);				dMz             = vec_sub(iMz,dMz);      				rsqO            = vec_madd(dOx,dOx,nul);				rsqH1           = vec_madd(dH1x,dH1x,nul);				rsqH2           = vec_madd(dH2x,dH2x,nul);				rsqM            = vec_madd(dMx,dMx,nul);				rsqO            = vec_madd(dOy,dOy,rsqO);				rsqH1           = vec_madd(dH1y,dH1y,rsqH1);				rsqH2           = vec_madd(dH2y,dH2y,rsqH2);				rsqM            = vec_madd(dMy,dMy,rsqM);				rsqO            = vec_madd(dOz,dOz,rsqO);				rsqH1           = vec_madd(dH1z,dH1z,rsqH1);				rsqH2           = vec_madd(dH2z,dH2z,rsqH2);				rsqM            = vec_madd(dMz,dMz,rsqM);				rinvO           = do_invsqrt(rsqO);				do_3_invsqrt(rsqM,rsqH1,rsqH2,&rinvM,&rinvH1,&rinvH2);      				zero_highest_element_in_vector(&rsqO);				zero_highest_element_in_vector(&rinvO);				zero_highest_element_in_3_vectors(&rsqM,&rsqH1,&rsqH2);				zero_highest_element_in_3_vectors(&rinvM,&rinvH1,&rinvH2);				r               = vec_madd(rinvO,rsqO,nul);				rinvsqM         = vec_madd(rinvM,rinvM,nul);				rinvsqH1        = vec_madd(rinvH1,rinvH1,nul);				rinvsqH2        = vec_madd(rinvH2,rinvH2,nul);				tja             = ntiA+2*type[jnra];				tjb             = ntiA+2*type[jnrb];				tjc             = ntiA+2*type[jnrc];				tjd             = ntiA+2*type[jnrd];				/* load 2 j charges and multiply by iq */				jq=load_2_float(charge+jnra,charge+jnrb);				load_2_pair(vdwparam+tja,vdwparam+tjb,&c6,&c12);				do_2_ljtable_lj(VFtab,vec_madd(r,tsc,nul),&VVd,&FFd,&VVr,&FFr);				Vvdwtot         = vec_madd(c6,VVd,Vvdwtot);				fsO             = vec_nmsub(c6,FFd,nul);				Vvdwtot         = vec_madd(c12,VVr,Vvdwtot);				fsO             = vec_nmsub(c12,FFr,fsO);				fsO             = vec_madd(fsO,tsc,nul);				fsO             = vec_madd(fsO,rinvO,nul);				qqM             = vec_madd(iqM,jq,nul);				qqH             = vec_madd(iqH,jq,nul);				krsqM           = vec_madd(vkrf,rsqM,nul);				krsqH1          = vec_madd(vkrf,rsqH1,nul);				krsqH2          = vec_madd(vkrf,rsqH2,nul);				fsM             = vec_nmsub(vec_two(),krsqM,rinvM);				vcoulM          = vec_add(rinvM,krsqM);				vcoulH1         = vec_add(rinvH1,krsqH1);				fsM             = vec_madd(qqM,fsM,nul);          				vcoulH2         = vec_add(rinvH2,krsqH2);				vcoulM          = vec_sub(vcoulM,vcrf);				vcoulH1         = vec_sub(vcoulH1,vcrf);				vcoulH2         = vec_sub(vcoulH2,vcrf);				vctot           = vec_madd(qqM,vcoulM,vctot);				fsH1            = vec_nmsub(vec_two(),krsqH1,rinvH1);				fsH2            = vec_nmsub(vec_two(),krsqH2,rinvH2);				vctot           = vec_madd(qqH,vcoulH1,vctot);				fsM             = vec_madd(fsM,rinvsqM,nul);				fsH1            = vec_madd(fsH1,qqH,nul);				fsH2            = vec_madd(fsH2,qqH,nul);				vctot           = vec_madd(qqH,vcoulH2,vctot);				fsH1            = vec_madd(fsH1,rinvsqH1,nul);				fsH2            = vec_madd(fsH2,rinvsqH2,nul);				fiOx            = vec_madd(fsO,dOx,fiOx); /* +=fx */				dOx             = vec_nmsub(fsO,dOx,nul); /* -fx */				fiOy            = vec_madd(fsO,dOy,fiOy); /* +=fy */				dOy             = vec_nmsub(fsO,dOy,nul); /* -fy */				fiOz            = vec_madd(fsO,dOz,fiOz); /* +=fz */				dOz             = vec_nmsub(fsO,dOz,nul); /* -fz */				fiH1x           = vec_madd(fsH1,dH1x,fiH1x); /* +=fx */				dOx             = vec_nmsub(fsH1,dH1x,dOx); /* -fx */				fiH1y           = vec_madd(fsH1,dH1y,fiH1y); /* +=fy */				dOy             = vec_nmsub(fsH1,dH1y,dOy); /* -fy */				fiH1z           = vec_madd(fsH1,dH1z,fiH1z); /* +=fz */				dOz             = vec_nmsub(fsH1,dH1z,dOz); /* -fz */				fiH2x           = vec_madd(fsH2,dH2x,fiH2x); /* +=fx */				dOx             = vec_nmsub(fsH2,dH2x,dOx); /* -fx */				fiH2y           = vec_madd(fsH2,dH2y,fiH2y); /* +=fy */				dOy             = vec_nmsub(fsH2,dH2y,dOy); /* -fy */				fiH2z           = vec_madd(fsH2,dH2z,fiH2z); /* +=fz */				dOz             = vec_nmsub(fsH2,dH2z,dOz); /* -fz */				fiMx            = vec_madd(fsM,dMx,fiMx); /* +=fx */				dOx             = vec_nmsub(fsM,dMx,dOx); /* -fx */				fiMy            = vec_madd(fsM,dMy,fiMy); /* +=fy */				dOy             = vec_nmsub(fsM,dMy,dOy); /* -fy */				fiMz            = vec_madd(fsM,dMz,fiMz); /* +=fz */				dOz             = vec_nmsub(fsM,dMz,dOz); /* -fz */      				transpose_4_to_2(dOx,dOy,dOz,nul,&tmp1,&tmp2);				add_xyz_to_mem(faction+j3a,tmp1);				add_xyz_to_mem(faction+j3b,tmp2);			} else if(k<nj1) {				jnra            = jjnr[k];				jnrb            = jjnr[k+1];				jnrc            = jjnr[k+2];				j3a             = 3*jnra;				j3b             = 3*jnrb;				j3c             = 3*jnrc;				transpose_1_to_3(load_xyz(pos+j3a),&dMx,&dMy,&dMz);				dOx             = vec_sub(iOx,dMx);				dOy             = vec_sub(iOy,dMy);				dOz             = vec_sub(iOz,dMz);				dH1x            = vec_sub(iH1x,dMx);				dH1y            = vec_sub(iH1y,dMy);				dH1z            = vec_sub(iH1z,dMz);				dH2x            = vec_sub(iH2x,dMx);				dH2y            = vec_sub(iH2y,dMy);				dH2z            = vec_sub(iH2z,dMz);				dMx             = vec_sub(iMx,dMx);				dMy             = vec_sub(iMy,dMy);				dMz             = vec_sub(iMz,dMz);      				rsqO            = vec_madd(dOx,dOx,nul);				rsqH1           = vec_madd(dH1x,dH1x,nul);				rsqH2           = vec_madd(dH2x,dH2x,nul);				rsqM            = vec_madd(dMx,dMx,nul);				rsqO            = vec_madd(dOy,dOy,rsqO);				rsqH1           = vec_madd(dH1y,dH1y,rsqH1);				rsqH2           = vec_madd(dH2y,dH2y,rsqH2);				rsqM            = vec_madd(dMy,dMy,rsqM);				rsqO            = vec_madd(dOz,dOz,rsqO);				rsqH1           = vec_madd(dH1z,dH1z,rsqH1);				rsqH2           = vec_madd(dH2z,dH2z,rsqH2);				rsqM            = vec_madd(dMz,dMz,rsqM);				rinvO           = do_invsqrt(rsqO);				do_3_invsqrt(rsqM,rsqH1,rsqH2,&rinvM,&rinvH1,&rinvH2);      				zero_highest_element_in_vector(&rsqO);				zero_highest_element_in_vector(&rinvO);				zero_highest_element_in_3_vectors(&rsqM,&rsqH1,&rsqH2);				zero_highest_element_in_3_vectors(&rinvM,&rinvH1,&rinvH2);				r               = vec_madd(rinvO,rsqO,nul);				rinvsqM         = vec_madd(rinvM,rinvM,nul);				rinvsqH1        = vec_madd(rinvH1,rinvH1,nul);				rinvsqH2        = vec_madd(rinvH2,rinvH2,nul);				tja             = ntiA+2*type[jnra];				tjb             = ntiA+2*type[jnrb];				tjc             = ntiA+2*type[jnrc];				tjd             = ntiA+2*type[jnrd];				/* load 1 j charge and multiply by iq */				jq=load_1_float(charge+jnra);				load_1_pair(vdwparam+tja,&c6,&c12);				do_1_ljtable_lj(VFtab,vec_madd(r,tsc,nul),&VVd,&FFd,&VVr,&FFr);				Vvdwtot         = vec_madd(c6,VVd,Vvdwtot);				fsO             = vec_nmsub(c6,FFd,nul);				Vvdwtot         = vec_madd(c12,VVr,Vvdwtot);				fsO             = vec_nmsub(c12,FFr,fsO);				fsO             = vec_madd(fsO,tsc,nul);				fsO             = vec_madd(fsO,rinvO,nul);								qqM             = vec_madd(iqM,jq,nul);				qqH             = vec_madd(iqH,jq,nul);				krsqM           = vec_madd(vkrf,rsqM,nul);				krsqH1          = vec_madd(vkrf,rsqH1,nul);				krsqH2          = vec_madd(vkrf,rsqH2,nul);				fsM             = vec_nmsub(vec_two(),krsqM,rinvM);				vcoulM          = vec_add(rinvM,krsqM);				vcoulH1         = vec_add(rinvH1,krsqH1);				fsM             = vec_madd(qqM,fsM,nul);          				vcoulH2         = vec_add(rinvH2,krsqH2);				vcoulM          = vec_sub(vcoulM,vcrf);				vcoulH1         = vec_sub(vcoulH1,vcrf);				vcoulH2         = vec_sub(vcoulH2,vcrf);				vctot           = vec_madd(qqM,vcoulM,vctot);				fsH1            = vec_nmsub(vec_two(),krsqH1,rinvH1);				fsH2            = vec_nmsub(vec_two(),krsqH2,rinvH2);				vctot           = vec_madd(qqH,vcoulH1,vctot);				fsM             = vec_madd(fsM,rinvsqM,nul);				fsH1            = vec_madd(fsH1,qqH,nul);				fsH2            = vec_madd(fsH2,qqH,nul);				vctot           = vec_madd(qqH,vcoulH2,vctot);				fsH1            = vec_madd(fsH1,rinvsqH1,nul);				fsH2            = vec_madd(fsH2,rinvsqH2,nul);				fiOx            = vec_madd(fsO,dOx,fiOx); /* +=fx */				dOx             = vec_nmsub(fsO,dOx,nul); /* -fx */				fiOy            = vec_madd(fsO,dOy,fiOy); /* +=fy */				dOy             = vec_nmsub(fsO,dOy,nul); /* -fy */				fiOz            = vec_madd(fsO,dOz,fiOz); /* +=fz */				dOz             = vec_nmsub(fsO,dOz,nul); /* -fz */				fiH1x           = vec_madd(fsH1,dH1x,fiH1x); /* +=fx */				dOx             = vec_nmsub(fsH1,dH1x,dOx); /* -fx */				fiH1y           = vec_madd(fsH1,dH1y,fiH1y); /* +=fy */				dOy             = vec_nmsub(fsH1,dH1y,dOy); /* -fy */				fiH1z           = vec_madd(fsH1,dH1z,fiH1z); /* +=fz */				dOz             = vec_nmsub(fsH1,dH1z,dOz); /* -fz */				fiH2x           = vec_madd(fsH2,dH2x,fiH2x); /* +=fx */				dOx             = vec_nmsub(fsH2,dH2x,dOx); /* -fx */				fiH2y           = vec_madd(fsH2,dH2y,fiH2y); /* +=fy */				dOy             = vec_nmsub(fsH2,dH2y,dOy); /* -fy */				fiH2z           = vec_madd(fsH2,dH2z,fiH2z); /* +=fz */				dOz             = vec_nmsub(fsH2,dH2z,dOz); /* -fz */				fiMx            = vec_madd(fsM,dMx,fiMx); /* +=fx */				dOx             = vec_nmsub(fsM,dMx,dOx); /* -fx */				fiMy            = vec_madd(fsM,dMy,fiMy); /* +=fy */				dOy             = vec_nmsub(fsM,dMy,dOy); /* -fy */				fiMz            = vec_madd(fsM,dMz,fiMz); /* +=fz */				dOz             = vec_nmsub(fsM,dMz,dOz); /* -fz */      				transpose_3_to_1(dOx,dOy,dOz,&tmp1);				add_xyz_to_mem(faction+j3a,tmp1);			}			/* update outer data */			update_i_4atoms_forces(faction+ii3,fshift+is3,								   fiOx,fiOy,fiOz,fiH1x,fiH1y,fiH1z,								   fiH2x,fiH2y,fiH2z,								   fiMx,fiMy,fiMz);			add_vector_to_float(Vc+gid[n],vctot);			add_vector_to_float(Vvdw+gid[n],Vvdwtot);			ninner += nj1 - nj0;		}#ifdef GMX_THREADS		nouter += nn1 - nn0;	} while (nn1<nri);#else	nouter = nri;#endif	*outeriter = nouter;	*inneriter = ninner;}void nb_kernel233nf_ppc_altivec(int *             p_nri,                       int               iinr[],                       int               jindex[],                       int               jjnr[],                       int               shift[],                       float             shiftvec[],                       float             fshift[],                       int               gid[],                       float             pos[],                       float             faction[],                       float             charge[],                       float *           p_facel,                       float *           p_krf,                       float *           p_crf,                       float             Vc[],                       int               type[],                       int *             p_ntype,                       float             vdwparam[],                       float             Vvdw[],                       float *           p_tabscale,                       float             VFtab[],                       float             invsqrta[],                       float             dvda[],                       float *           p_gbtabscale,                       float             GBtab[],                       int *             p_nthreads,                       int *             count,                       void *            mtx,                       int *             outeriter,                       int *             inneriter,					   float *           work){	vector float vkrf,vcrf;	vector float iOx,iOy,iOz,iH1x,iH1y,iH1z,iH2x,iH2y,iH2z,iMx,iMy,iMz;	vector float dOx,dOy,dOz,dH1x,dH1y,dH1z,dH2x,dH2y,dH2z,dMx,dMy,dMz;	vector float vfacel,vcoulM,vcoulH1,vcoulH2,nul;	vector float Vvdwtot,c6,c12,VVd,VVr,tsc,r;	vector float vctot,qqM,qqH,iqM,iqH,jq,krsqM,krsqH1,krsqH2;	vector float rinvO,rinvM,rinvH1,rinvH2,rinvsqH1,rinvsqH2,rinvsqM;	vector float rsqO,rsqH1,rsqH2,rsqM;  	int n,k,ii,is3,ii3,ntiA,nj0,nj1;	int jnra,jnrb,jnrc,jnrd;	int j3a,j3b,j3c,j3d;	int nri, ntype, nouter, ninner;	int tja,tjb,tjc,tjd;#ifdef GMX_THREADS	int nn0, nn1;#endif    nouter   = 0;    ninner   = 0;

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?