nb_kernel233_ppc_altivec.c
来自「最著名最快的分子模拟软件」· C语言 代码 · 共 1,005 行 · 第 1/3 页
C
1,005 行
fsH2 = vec_madd(fsH2,qqH,nul); vctot = vec_madd(qqH,vcoulH2,vctot); fsH1 = vec_madd(fsH1,rinvsqH1,nul); fsH2 = vec_madd(fsH2,rinvsqH2,nul); fiOx = vec_madd(fsO,dOx,fiOx); /* +=fx */ dOx = vec_nmsub(fsO,dOx,nul); /* -fx */ fiOy = vec_madd(fsO,dOy,fiOy); /* +=fy */ dOy = vec_nmsub(fsO,dOy,nul); /* -fy */ fiOz = vec_madd(fsO,dOz,fiOz); /* +=fz */ dOz = vec_nmsub(fsO,dOz,nul); /* -fz */ fiH1x = vec_madd(fsH1,dH1x,fiH1x); /* +=fx */ dOx = vec_nmsub(fsH1,dH1x,dOx); /* -fx */ fiH1y = vec_madd(fsH1,dH1y,fiH1y); /* +=fy */ dOy = vec_nmsub(fsH1,dH1y,dOy); /* -fy */ fiH1z = vec_madd(fsH1,dH1z,fiH1z); /* +=fz */ dOz = vec_nmsub(fsH1,dH1z,dOz); /* -fz */ fiH2x = vec_madd(fsH2,dH2x,fiH2x); /* +=fx */ dOx = vec_nmsub(fsH2,dH2x,dOx); /* -fx */ fiH2y = vec_madd(fsH2,dH2y,fiH2y); /* +=fy */ dOy = vec_nmsub(fsH2,dH2y,dOy); /* -fy */ fiH2z = vec_madd(fsH2,dH2z,fiH2z); /* +=fz */ dOz = vec_nmsub(fsH2,dH2z,dOz); /* -fz */ fiMx = vec_madd(fsM,dMx,fiMx); /* +=fx */ dOx = vec_nmsub(fsM,dMx,dOx); /* -fx */ fiMy = vec_madd(fsM,dMy,fiMy); /* +=fy */ dOy = vec_nmsub(fsM,dMy,dOy); /* -fy */ fiMz = vec_madd(fsM,dMz,fiMz); /* +=fz */ dOz = vec_nmsub(fsM,dMz,dOz); /* -fz */ transpose_4_to_3(dOx,dOy,dOz,nul,&tmp1,&tmp2,&tmp3); add_xyz_to_mem(faction+j3a,tmp1); add_xyz_to_mem(faction+j3b,tmp2); add_xyz_to_mem(faction+j3c,tmp3); } else if(k<(nj1-1)) { jnra = jjnr[k]; jnrb = jjnr[k+1]; jnrc = jjnr[k+2]; j3a = 3*jnra; j3b = 3*jnrb; j3c = 3*jnrc; transpose_2_to_3(load_xyz(pos+j3a), load_xyz(pos+j3b),&dMx,&dMy,&dMz); dOx = vec_sub(iOx,dMx); dOy = vec_sub(iOy,dMy); dOz = vec_sub(iOz,dMz); dH1x = vec_sub(iH1x,dMx); dH1y = vec_sub(iH1y,dMy); dH1z = vec_sub(iH1z,dMz); dH2x = vec_sub(iH2x,dMx); dH2y = vec_sub(iH2y,dMy); dH2z = vec_sub(iH2z,dMz); dMx = vec_sub(iMx,dMx); dMy = vec_sub(iMy,dMy); dMz = vec_sub(iMz,dMz); rsqO = vec_madd(dOx,dOx,nul); rsqH1 = vec_madd(dH1x,dH1x,nul); rsqH2 = vec_madd(dH2x,dH2x,nul); rsqM = vec_madd(dMx,dMx,nul); rsqO = vec_madd(dOy,dOy,rsqO); rsqH1 = vec_madd(dH1y,dH1y,rsqH1); rsqH2 = vec_madd(dH2y,dH2y,rsqH2); rsqM = vec_madd(dMy,dMy,rsqM); rsqO = vec_madd(dOz,dOz,rsqO); rsqH1 = vec_madd(dH1z,dH1z,rsqH1); rsqH2 = vec_madd(dH2z,dH2z,rsqH2); rsqM = vec_madd(dMz,dMz,rsqM); rinvO = do_invsqrt(rsqO); do_3_invsqrt(rsqM,rsqH1,rsqH2,&rinvM,&rinvH1,&rinvH2); zero_highest_element_in_vector(&rsqO); zero_highest_element_in_vector(&rinvO); zero_highest_element_in_3_vectors(&rsqM,&rsqH1,&rsqH2); zero_highest_element_in_3_vectors(&rinvM,&rinvH1,&rinvH2); r = vec_madd(rinvO,rsqO,nul); rinvsqM = vec_madd(rinvM,rinvM,nul); rinvsqH1 = vec_madd(rinvH1,rinvH1,nul); rinvsqH2 = vec_madd(rinvH2,rinvH2,nul); tja = ntiA+2*type[jnra]; tjb = ntiA+2*type[jnrb]; tjc = ntiA+2*type[jnrc]; tjd = ntiA+2*type[jnrd]; /* load 2 j charges and multiply by iq */ jq=load_2_float(charge+jnra,charge+jnrb); load_2_pair(vdwparam+tja,vdwparam+tjb,&c6,&c12); do_2_ljtable_lj(VFtab,vec_madd(r,tsc,nul),&VVd,&FFd,&VVr,&FFr); Vvdwtot = vec_madd(c6,VVd,Vvdwtot); fsO = vec_nmsub(c6,FFd,nul); Vvdwtot = vec_madd(c12,VVr,Vvdwtot); fsO = vec_nmsub(c12,FFr,fsO); fsO = vec_madd(fsO,tsc,nul); fsO = vec_madd(fsO,rinvO,nul); qqM = vec_madd(iqM,jq,nul); qqH = vec_madd(iqH,jq,nul); krsqM = vec_madd(vkrf,rsqM,nul); krsqH1 = vec_madd(vkrf,rsqH1,nul); krsqH2 = vec_madd(vkrf,rsqH2,nul); fsM = vec_nmsub(vec_two(),krsqM,rinvM); vcoulM = vec_add(rinvM,krsqM); vcoulH1 = vec_add(rinvH1,krsqH1); fsM = vec_madd(qqM,fsM,nul); vcoulH2 = vec_add(rinvH2,krsqH2); vcoulM = vec_sub(vcoulM,vcrf); vcoulH1 = vec_sub(vcoulH1,vcrf); vcoulH2 = vec_sub(vcoulH2,vcrf); vctot = vec_madd(qqM,vcoulM,vctot); fsH1 = vec_nmsub(vec_two(),krsqH1,rinvH1); fsH2 = vec_nmsub(vec_two(),krsqH2,rinvH2); vctot = vec_madd(qqH,vcoulH1,vctot); fsM = vec_madd(fsM,rinvsqM,nul); fsH1 = vec_madd(fsH1,qqH,nul); fsH2 = vec_madd(fsH2,qqH,nul); vctot = vec_madd(qqH,vcoulH2,vctot); fsH1 = vec_madd(fsH1,rinvsqH1,nul); fsH2 = vec_madd(fsH2,rinvsqH2,nul); fiOx = vec_madd(fsO,dOx,fiOx); /* +=fx */ dOx = vec_nmsub(fsO,dOx,nul); /* -fx */ fiOy = vec_madd(fsO,dOy,fiOy); /* +=fy */ dOy = vec_nmsub(fsO,dOy,nul); /* -fy */ fiOz = vec_madd(fsO,dOz,fiOz); /* +=fz */ dOz = vec_nmsub(fsO,dOz,nul); /* -fz */ fiH1x = vec_madd(fsH1,dH1x,fiH1x); /* +=fx */ dOx = vec_nmsub(fsH1,dH1x,dOx); /* -fx */ fiH1y = vec_madd(fsH1,dH1y,fiH1y); /* +=fy */ dOy = vec_nmsub(fsH1,dH1y,dOy); /* -fy */ fiH1z = vec_madd(fsH1,dH1z,fiH1z); /* +=fz */ dOz = vec_nmsub(fsH1,dH1z,dOz); /* -fz */ fiH2x = vec_madd(fsH2,dH2x,fiH2x); /* +=fx */ dOx = vec_nmsub(fsH2,dH2x,dOx); /* -fx */ fiH2y = vec_madd(fsH2,dH2y,fiH2y); /* +=fy */ dOy = vec_nmsub(fsH2,dH2y,dOy); /* -fy */ fiH2z = vec_madd(fsH2,dH2z,fiH2z); /* +=fz */ dOz = vec_nmsub(fsH2,dH2z,dOz); /* -fz */ fiMx = vec_madd(fsM,dMx,fiMx); /* +=fx */ dOx = vec_nmsub(fsM,dMx,dOx); /* -fx */ fiMy = vec_madd(fsM,dMy,fiMy); /* +=fy */ dOy = vec_nmsub(fsM,dMy,dOy); /* -fy */ fiMz = vec_madd(fsM,dMz,fiMz); /* +=fz */ dOz = vec_nmsub(fsM,dMz,dOz); /* -fz */ transpose_4_to_2(dOx,dOy,dOz,nul,&tmp1,&tmp2); add_xyz_to_mem(faction+j3a,tmp1); add_xyz_to_mem(faction+j3b,tmp2); } else if(k<nj1) { jnra = jjnr[k]; jnrb = jjnr[k+1]; jnrc = jjnr[k+2]; j3a = 3*jnra; j3b = 3*jnrb; j3c = 3*jnrc; transpose_1_to_3(load_xyz(pos+j3a),&dMx,&dMy,&dMz); dOx = vec_sub(iOx,dMx); dOy = vec_sub(iOy,dMy); dOz = vec_sub(iOz,dMz); dH1x = vec_sub(iH1x,dMx); dH1y = vec_sub(iH1y,dMy); dH1z = vec_sub(iH1z,dMz); dH2x = vec_sub(iH2x,dMx); dH2y = vec_sub(iH2y,dMy); dH2z = vec_sub(iH2z,dMz); dMx = vec_sub(iMx,dMx); dMy = vec_sub(iMy,dMy); dMz = vec_sub(iMz,dMz); rsqO = vec_madd(dOx,dOx,nul); rsqH1 = vec_madd(dH1x,dH1x,nul); rsqH2 = vec_madd(dH2x,dH2x,nul); rsqM = vec_madd(dMx,dMx,nul); rsqO = vec_madd(dOy,dOy,rsqO); rsqH1 = vec_madd(dH1y,dH1y,rsqH1); rsqH2 = vec_madd(dH2y,dH2y,rsqH2); rsqM = vec_madd(dMy,dMy,rsqM); rsqO = vec_madd(dOz,dOz,rsqO); rsqH1 = vec_madd(dH1z,dH1z,rsqH1); rsqH2 = vec_madd(dH2z,dH2z,rsqH2); rsqM = vec_madd(dMz,dMz,rsqM); rinvO = do_invsqrt(rsqO); do_3_invsqrt(rsqM,rsqH1,rsqH2,&rinvM,&rinvH1,&rinvH2); zero_highest_element_in_vector(&rsqO); zero_highest_element_in_vector(&rinvO); zero_highest_element_in_3_vectors(&rsqM,&rsqH1,&rsqH2); zero_highest_element_in_3_vectors(&rinvM,&rinvH1,&rinvH2); r = vec_madd(rinvO,rsqO,nul); rinvsqM = vec_madd(rinvM,rinvM,nul); rinvsqH1 = vec_madd(rinvH1,rinvH1,nul); rinvsqH2 = vec_madd(rinvH2,rinvH2,nul); tja = ntiA+2*type[jnra]; tjb = ntiA+2*type[jnrb]; tjc = ntiA+2*type[jnrc]; tjd = ntiA+2*type[jnrd]; /* load 1 j charge and multiply by iq */ jq=load_1_float(charge+jnra); load_1_pair(vdwparam+tja,&c6,&c12); do_1_ljtable_lj(VFtab,vec_madd(r,tsc,nul),&VVd,&FFd,&VVr,&FFr); Vvdwtot = vec_madd(c6,VVd,Vvdwtot); fsO = vec_nmsub(c6,FFd,nul); Vvdwtot = vec_madd(c12,VVr,Vvdwtot); fsO = vec_nmsub(c12,FFr,fsO); fsO = vec_madd(fsO,tsc,nul); fsO = vec_madd(fsO,rinvO,nul); qqM = vec_madd(iqM,jq,nul); qqH = vec_madd(iqH,jq,nul); krsqM = vec_madd(vkrf,rsqM,nul); krsqH1 = vec_madd(vkrf,rsqH1,nul); krsqH2 = vec_madd(vkrf,rsqH2,nul); fsM = vec_nmsub(vec_two(),krsqM,rinvM); vcoulM = vec_add(rinvM,krsqM); vcoulH1 = vec_add(rinvH1,krsqH1); fsM = vec_madd(qqM,fsM,nul); vcoulH2 = vec_add(rinvH2,krsqH2); vcoulM = vec_sub(vcoulM,vcrf); vcoulH1 = vec_sub(vcoulH1,vcrf); vcoulH2 = vec_sub(vcoulH2,vcrf); vctot = vec_madd(qqM,vcoulM,vctot); fsH1 = vec_nmsub(vec_two(),krsqH1,rinvH1); fsH2 = vec_nmsub(vec_two(),krsqH2,rinvH2); vctot = vec_madd(qqH,vcoulH1,vctot); fsM = vec_madd(fsM,rinvsqM,nul); fsH1 = vec_madd(fsH1,qqH,nul); fsH2 = vec_madd(fsH2,qqH,nul); vctot = vec_madd(qqH,vcoulH2,vctot); fsH1 = vec_madd(fsH1,rinvsqH1,nul); fsH2 = vec_madd(fsH2,rinvsqH2,nul); fiOx = vec_madd(fsO,dOx,fiOx); /* +=fx */ dOx = vec_nmsub(fsO,dOx,nul); /* -fx */ fiOy = vec_madd(fsO,dOy,fiOy); /* +=fy */ dOy = vec_nmsub(fsO,dOy,nul); /* -fy */ fiOz = vec_madd(fsO,dOz,fiOz); /* +=fz */ dOz = vec_nmsub(fsO,dOz,nul); /* -fz */ fiH1x = vec_madd(fsH1,dH1x,fiH1x); /* +=fx */ dOx = vec_nmsub(fsH1,dH1x,dOx); /* -fx */ fiH1y = vec_madd(fsH1,dH1y,fiH1y); /* +=fy */ dOy = vec_nmsub(fsH1,dH1y,dOy); /* -fy */ fiH1z = vec_madd(fsH1,dH1z,fiH1z); /* +=fz */ dOz = vec_nmsub(fsH1,dH1z,dOz); /* -fz */ fiH2x = vec_madd(fsH2,dH2x,fiH2x); /* +=fx */ dOx = vec_nmsub(fsH2,dH2x,dOx); /* -fx */ fiH2y = vec_madd(fsH2,dH2y,fiH2y); /* +=fy */ dOy = vec_nmsub(fsH2,dH2y,dOy); /* -fy */ fiH2z = vec_madd(fsH2,dH2z,fiH2z); /* +=fz */ dOz = vec_nmsub(fsH2,dH2z,dOz); /* -fz */ fiMx = vec_madd(fsM,dMx,fiMx); /* +=fx */ dOx = vec_nmsub(fsM,dMx,dOx); /* -fx */ fiMy = vec_madd(fsM,dMy,fiMy); /* +=fy */ dOy = vec_nmsub(fsM,dMy,dOy); /* -fy */ fiMz = vec_madd(fsM,dMz,fiMz); /* +=fz */ dOz = vec_nmsub(fsM,dMz,dOz); /* -fz */ transpose_3_to_1(dOx,dOy,dOz,&tmp1); add_xyz_to_mem(faction+j3a,tmp1); } /* update outer data */ update_i_4atoms_forces(faction+ii3,fshift+is3, fiOx,fiOy,fiOz,fiH1x,fiH1y,fiH1z, fiH2x,fiH2y,fiH2z, fiMx,fiMy,fiMz); add_vector_to_float(Vc+gid[n],vctot); add_vector_to_float(Vvdw+gid[n],Vvdwtot); ninner += nj1 - nj0; }#ifdef GMX_THREADS nouter += nn1 - nn0; } while (nn1<nri);#else nouter = nri;#endif *outeriter = nouter; *inneriter = ninner;}void nb_kernel233nf_ppc_altivec(int * p_nri, int iinr[], int jindex[], int jjnr[], int shift[], float shiftvec[], float fshift[], int gid[], float pos[], float faction[], float charge[], float * p_facel, float * p_krf, float * p_crf, float Vc[], int type[], int * p_ntype, float vdwparam[], float Vvdw[], float * p_tabscale, float VFtab[], float invsqrta[], float dvda[], float * p_gbtabscale, float GBtab[], int * p_nthreads, int * count, void * mtx, int * outeriter, int * inneriter, float * work){ vector float vkrf,vcrf; vector float iOx,iOy,iOz,iH1x,iH1y,iH1z,iH2x,iH2y,iH2z,iMx,iMy,iMz; vector float dOx,dOy,dOz,dH1x,dH1y,dH1z,dH2x,dH2y,dH2z,dMx,dMy,dMz; vector float vfacel,vcoulM,vcoulH1,vcoulH2,nul; vector float Vvdwtot,c6,c12,VVd,VVr,tsc,r; vector float vctot,qqM,qqH,iqM,iqH,jq,krsqM,krsqH1,krsqH2; vector float rinvO,rinvM,rinvH1,rinvH2,rinvsqH1,rinvsqH2,rinvsqM; vector float rsqO,rsqH1,rsqH2,rsqM; int n,k,ii,is3,ii3,ntiA,nj0,nj1; int jnra,jnrb,jnrc,jnrd; int j3a,j3b,j3c,j3d; int nri, ntype, nouter, ninner; int tja,tjb,tjc,tjd;#ifdef GMX_THREADS int nn0, nn1;#endif nouter = 0; ninner = 0;
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?