nb_kernel132_ppc_altivec.c

来自「最著名最快的分子模拟软件」· C语言 代码 · 共 1,449 行 · 第 1/4 页

C
1,449
字号
/* -*- mode: c; tab-width: 4; indent-tabs-mode: n; c-basic-offset: 4 -*-  * * $Id: nb_kernel132_ppc_altivec.c,v 1.1 2004/12/26 19:26:00 lindahl Exp $ *  * This file is part of Gromacs        Copyright (c) 1991-2004 * David van der Spoel, Erik Lindahl, University of Groningen. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * To help us fund GROMACS development, we humbly ask that you cite * the research papers on the package. Check out http://www.gromacs.org *  * And Hey: * Gnomes, ROck Monsters And Chili Sauce */#ifdef HAVE_CONFIG_H#include <config.h>#endif/* Must come directly after config.h */#include <gmx_thread.h>#include "ppc_altivec_util.h"#include "nb_kernel132_ppc_altivec.h"void nb_kernel132_ppc_altivec  (int *             p_nri,                       int               iinr[],                       int               jindex[],                       int               jjnr[],                       int               shift[],                       float             shiftvec[],                       float             fshift[],                       int               gid[],                       float             pos[],                       float             faction[],                       float             charge[],                       float *           p_facel,                       float *           p_krf,                       float *           p_crf,                       float             Vc[],                       int               type[],                       int *             p_ntype,                       float             vdwparam[],                       float             Vvdw[],                       float *           p_tabscale,                       float             VFtab[],                       float             invsqrta[],                       float             dvda[],                       float *           p_gbtabscale,                       float             GBtab[],                       int *             p_nthreads,                       int *             count,                       void *            mtx,                       int *             outeriter,                       int *             inneriter,					   float *           work){	vector float ix1,iy1,iz1,ix2,iy2,iz2,ix3,iy3,iz3;	vector float jx1,jy1,jz1,jx2,jy2,jz2,jx3,jy3,jz3;	vector float dx11,dy11,dz11,dx12,dy12,dz12,dx13,dy13,dz13;	vector float dx21,dy21,dz21,dx22,dy22,dz22,dx23,dy23,dz23;	vector float dx31,dy31,dz31,dx32,dy32,dz32,dx33,dy33,dz33;	vector float rsq11,rsq12,rsq13,rsq21,rsq22,rsq23,rsq31,rsq32,rsq33;	vector float rinv11,rinv12,rinv13,rinv21,rinv22;	vector float rinv23,rinv31,rinv32,rinv33;	vector float rinvsq12,rinvsq13;	vector float rinvsq21,rinvsq22,rinvsq23;	vector float rinvsq31,rinvsq32,rinvsq33;	vector float vc11,vc12,vc13,vc21,vc22,vc23,vc31,vc32,vc33;	vector float Vvdwtot,c6,c12,c6t,c12t,VVd,VVr,FFd,FFr,tsc,r;		vector float vfacel,nul;	vector float fs11,fs12,fs13,fs21,fs22,fs23,fs31,fs32,fs33;	vector float fix1,fiy1,fiz1,fix2,fiy2,fiz2,fix3,fiy3,fiz3;	vector float fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,fjx3,fjy3,fjz3;	vector float vctot,qqOO,qqOH,qqHH,qO,qH,qqOOt,qqOHt,qqHHt; 	int n,k,ii,is3,ii3,nj0,nj1;	int jnra,jnrb,jnrc,jnrd;	int j3a,j3b,j3c,j3d;	int tp,tj;	int nri, ntype, nouter, ninner;#ifdef GMX_THREADS	int nn0, nn1;#endif    nouter   = 0;    ninner   = 0;    nri      = *p_nri;    ntype    = *p_ntype;	nul=vec_zero();	tsc=load_float_and_splat(p_tabscale);	vfacel=load_float_and_splat(p_facel);	ii        = iinr[0];	qO        = load_float_and_splat(charge+ii);	qH        = load_float_and_splat(charge+ii+1);	qqOO      = vec_madd(qO,qO,nul);	qqOH      = vec_madd(qO,qH,nul);	qqHH      = vec_madd(qH,qH,nul);	qqOO      = vec_madd(qqOO,vfacel,nul);	qqOH      = vec_madd(qqOH,vfacel,nul);	qqHH      = vec_madd(qqHH,vfacel,nul);	tp        = 2*type[ii];	tj        = (ntype+1)*tp;	load_1_pair(vdwparam+tj,&c6,&c12);	c6        = vec_splat(c6,0);	c12       = vec_splat(c12,0);#ifdef GMX_THREADS    nthreads = *p_nthreads;	do {		gmx_thread_mutex_lock((gmx_thread_mutex_t *)mtx);		nn0              = *count;		nn1              = nn0+(nri-nn0)/(2*nthreads)+3;		*count           = nn1;		gmx_thread_mutex_unlock((gmx_thread_mutex_t *)mtx);		if(nn1>nri) nn1=nri;		for(n=nn0; (n<nn1); n++) {      #if 0		} /* maintain correct indentation even with conditional left braces */#endif#else /* without gmx_threads */		for(n=0;n<nri;n++) {#endif  			is3        = 3*shift[n];			ii         = iinr[n];			ii3        = 3*ii;			load_1_3atoms_shift_and_splat(pos+ii3,shiftvec+is3,&ix1,&iy1,&iz1,										  &ix2,&iy2,&iz2,&ix3,&iy3,&iz3);			vctot      = nul;			Vvdwtot    = nul;			fix1       = nul;			fiy1       = nul;			fiz1       = nul;			fix2       = nul;			fiy2       = nul;			fiz2       = nul;			fix3       = nul;			fiy3       = nul;			fiz3       = nul;			nj0        = jindex[n];			nj1        = jindex[n+1];			for(k=nj0; k<(nj1-3); k+=4) {				jnra            = jjnr[k];				jnrb            = jjnr[k+1];				jnrc            = jjnr[k+2];				jnrd            = jjnr[k+3];				j3a             = 3*jnra;				j3b             = 3*jnrb;				j3c             = 3*jnrc;				j3d             = 3*jnrd;				load_4_3atoms(pos+j3a,pos+j3b,pos+j3c,pos+j3d,							  &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);				dx11            = vec_sub(ix1,jx1);				dx12            = vec_sub(ix1,jx2);				dx13            = vec_sub(ix1,jx3);				dy11            = vec_sub(iy1,jy1);				dy12            = vec_sub(iy1,jy2);				dy13            = vec_sub(iy1,jy3);				dz11            = vec_sub(iz1,jz1);				dz12            = vec_sub(iz1,jz2);				dz13            = vec_sub(iz1,jz3);				dx21            = vec_sub(ix2,jx1);				dx22            = vec_sub(ix2,jx2);				dx23            = vec_sub(ix2,jx3);				dy21            = vec_sub(iy2,jy1);				dy22            = vec_sub(iy2,jy2);				dy23            = vec_sub(iy2,jy3);				dz21            = vec_sub(iz2,jz1);				dz22            = vec_sub(iz2,jz2);				dz23            = vec_sub(iz2,jz3);				dx31            = vec_sub(ix3,jx1);				dx32            = vec_sub(ix3,jx2);				dx33            = vec_sub(ix3,jx3);				dy31            = vec_sub(iy3,jy1);				dy32            = vec_sub(iy3,jy2);				dy33            = vec_sub(iy3,jy3);				dz31            = vec_sub(iz3,jz1);				dz32            = vec_sub(iz3,jz2);				dz33            = vec_sub(iz3,jz3);				rsq11           = vec_madd(dx11,dx11,nul);				rsq12           = vec_madd(dx12,dx12,nul);				rsq13           = vec_madd(dx13,dx13,nul);				rsq21           = vec_madd(dx21,dx21,nul);				rsq22           = vec_madd(dx22,dx22,nul);				rsq23           = vec_madd(dx23,dx23,nul);				rsq31           = vec_madd(dx31,dx31,nul);				rsq32           = vec_madd(dx32,dx32,nul);				rsq33           = vec_madd(dx33,dx33,nul);				rsq11           = vec_madd(dy11,dy11,rsq11);				rsq12           = vec_madd(dy12,dy12,rsq12);				rsq13           = vec_madd(dy13,dy13,rsq13);				rsq21           = vec_madd(dy21,dy21,rsq21);				rsq22           = vec_madd(dy22,dy22,rsq22);				rsq23           = vec_madd(dy23,dy23,rsq23);				rsq31           = vec_madd(dy31,dy31,rsq31);				rsq32           = vec_madd(dy32,dy32,rsq32);				rsq33           = vec_madd(dy33,dy33,rsq33);				rsq11           = vec_madd(dz11,dz11,rsq11);				rsq12           = vec_madd(dz12,dz12,rsq12);				rsq13           = vec_madd(dz13,dz13,rsq13);				rsq21           = vec_madd(dz21,dz21,rsq21);				rsq22           = vec_madd(dz22,dz22,rsq22);				rsq23           = vec_madd(dz23,dz23,rsq23);				rsq31           = vec_madd(dz31,dz31,rsq31);				rsq32           = vec_madd(dz32,dz32,rsq32);				rsq33           = vec_madd(dz33,dz33,rsq33);				do_9_invsqrt(rsq11,rsq12,rsq13,							 rsq21,rsq22,rsq23,							 rsq31,rsq32,rsq33,							 &rinv11,&rinv12,&rinv13,							 &rinv21,&rinv22,&rinv23,							 &rinv31,&rinv32,&rinv33);				r               = vec_madd(rinv11,rsq11,nul);				rinvsq12        = vec_madd(rinv12,rinv12,nul);				rinvsq13        = vec_madd(rinv13,rinv13,nul);				rinvsq21        = vec_madd(rinv21,rinv21,nul);				rinvsq22        = vec_madd(rinv22,rinv22,nul);				rinvsq23        = vec_madd(rinv23,rinv23,nul);				rinvsq31        = vec_madd(rinv31,rinv31,nul);				rinvsq32        = vec_madd(rinv32,rinv32,nul);				rinvsq33        = vec_madd(rinv33,rinv33,nul);				vc11            = vec_madd(rinv11,qqOO,nul);				vc12            = vec_madd(rinv12,qqOH,nul);				vc13            = vec_madd(rinv13,qqOH,nul);				vc21            = vec_madd(rinv21,qqOH,nul);				vc22            = vec_madd(rinv22,qqHH,nul);				vc23            = vec_madd(rinv23,qqHH,nul);				vc31            = vec_madd(rinv31,qqOH,nul);				vc32            = vec_madd(rinv32,qqHH,nul);				vc33            = vec_madd(rinv33,qqHH,nul);				do_4_ljtable_lj(VFtab,vec_madd(r,tsc,nul),&VVd,&FFd,&VVr,&FFr);				Vvdwtot         = vec_madd(c6,VVd,Vvdwtot);				fs11            = vec_nmsub(c6,FFd,nul);				Vvdwtot         = vec_madd(c12,VVr,Vvdwtot);				fs11            = vec_nmsub(c12,FFr,fs11);				fs11            = vec_madd(fs11,tsc,nul);				fs11            = vec_madd(vc11,rinv11,fs11);				fs11            = vec_madd(fs11,rinv11,nul);								fs12            = vec_madd(vc12,rinvsq12,nul);				fs13            = vec_madd(vc13,rinvsq13,nul);				fs21            = vec_madd(vc21,rinvsq21,nul);				fs22            = vec_madd(vc22,rinvsq22,nul);				fs23            = vec_madd(vc23,rinvsq23,nul);				fs31            = vec_madd(vc31,rinvsq31,nul);				fs32            = vec_madd(vc32,rinvsq32,nul);				fs33            = vec_madd(vc33,rinvsq33,nul);				vctot           = vec_add(vctot,vc11);				vc12            = vec_add(vc12,vc13);				vc21            = vec_add(vc21,vc22);				vc23            = vec_add(vc23,vc31);				vc32            = vec_add(vc32,vc33);				vctot           = vec_add(vctot,vc12);				vc21            = vec_add(vc21,vc23);				vctot           = vec_add(vctot,vc32);				vctot           = vec_add(vctot,vc21); 				fix1            = vec_madd(fs11,dx11,fix1);				fiy1            = vec_madd(fs11,dy11,fiy1);				fiz1            = vec_madd(fs11,dz11,fiz1);				fix2            = vec_madd(fs21,dx21,fix2);				fiy2            = vec_madd(fs21,dy21,fiy2);				fiz2            = vec_madd(fs21,dz21,fiz2);				fix3            = vec_madd(fs31,dx31,fix3);				fiy3            = vec_madd(fs31,dy31,fiy3);				fiz3            = vec_madd(fs31,dz31,fiz3);				fix1            = vec_madd(fs12,dx12,fix1);				fiy1            = vec_madd(fs12,dy12,fiy1);				fiz1            = vec_madd(fs12,dz12,fiz1);				fix2            = vec_madd(fs22,dx22,fix2);				fiy2            = vec_madd(fs22,dy22,fiy2);				fiz2            = vec_madd(fs22,dz22,fiz2);				fix3            = vec_madd(fs32,dx32,fix3);				fiy3            = vec_madd(fs32,dy32,fiy3);				fiz3            = vec_madd(fs32,dz32,fiz3);				fix1            = vec_madd(fs13,dx13,fix1);				fiy1            = vec_madd(fs13,dy13,fiy1);				fiz1            = vec_madd(fs13,dz13,fiz1);				fix2            = vec_madd(fs23,dx23,fix2);				fiy2            = vec_madd(fs23,dy23,fiy2);				fiz2            = vec_madd(fs23,dz23,fiz2);				fix3            = vec_madd(fs33,dx33,fix3);				fiy3            = vec_madd(fs33,dy33,fiy3);				fiz3            = vec_madd(fs33,dz33,fiz3);				fjx1            = vec_nmsub(fs11,dx11,nul);				fjy1            = vec_nmsub(fs11,dy11,nul);				fjz1            = vec_nmsub(fs11,dz11,nul);				fjx2            = vec_nmsub(fs12,dx12,nul);				fjy2            = vec_nmsub(fs12,dy12,nul);				fjz2            = vec_nmsub(fs12,dz12,nul);				fjx3            = vec_nmsub(fs13,dx13,nul);				fjy3            = vec_nmsub(fs13,dy13,nul);				fjz3            = vec_nmsub(fs13,dz13,nul);				fjx1            = vec_nmsub(fs21,dx21,fjx1);				fjy1            = vec_nmsub(fs21,dy21,fjy1);				fjz1            = vec_nmsub(fs21,dz21,fjz1);				fjx2            = vec_nmsub(fs22,dx22,fjx2);				fjy2            = vec_nmsub(fs22,dy22,fjy2);				fjz2            = vec_nmsub(fs22,dz22,fjz2);				fjx3            = vec_nmsub(fs23,dx23,fjx3);				fjy3            = vec_nmsub(fs23,dy23,fjy3);				fjz3            = vec_nmsub(fs23,dz23,fjz3);				fjx1            = vec_nmsub(fs31,dx31,fjx1);				fjy1            = vec_nmsub(fs31,dy31,fjy1);				fjz1            = vec_nmsub(fs31,dz31,fjz1);				fjx2            = vec_nmsub(fs32,dx32,fjx2);				fjy2            = vec_nmsub(fs32,dy32,fjy2);				fjz2            = vec_nmsub(fs32,dz32,fjz2);				fjx3            = vec_nmsub(fs33,dx33,fjx3);				fjy3            = vec_nmsub(fs33,dy33,fjy3);				fjz3            = vec_nmsub(fs33,dz33,fjz3);								add_force_to_4_3atoms(faction+j3a,faction+j3b,									  faction+j3c,faction+j3d,									  fjx1,fjy1,fjz1,fjx2,fjy2,fjz2,									  fjx3,fjy3,fjz3);			} 			if(k<(nj1-2)) {				jnra            = jjnr[k];				jnrb            = jjnr[k+1];				jnrc            = jjnr[k+2];				j3a             = 3*jnra;				j3b             = 3*jnrb;				j3c             = 3*jnrc;				load_3_3atoms(pos+j3a,pos+j3b,pos+j3c,							  &jx1,&jy1,&jz1,&jx2,&jy2,&jz2,&jx3,&jy3,&jz3);				qqOOt           = vec_sld(qqOO,nul,4);				qqOHt           = vec_sld(qqOH,nul,4);				qqHHt           = vec_sld(qqHH,nul,4);				c6t             = vec_sld(c6,nul,4);				c12t            = vec_sld(c12,nul,4);				dx11            = vec_sub(ix1,jx1);				dx12            = vec_sub(ix1,jx2);				dx13            = vec_sub(ix1,jx3);				dy11            = vec_sub(iy1,jy1);				dy12            = vec_sub(iy1,jy2);				dy13            = vec_sub(iy1,jy3);

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?