nb_kernel211_ppc_altivec.c
来自「最著名最快的分子模拟软件」· C语言 代码 · 共 869 行 · 第 1/3 页
C
869 行
vector float rinvO,rinvH1,rinvH2,rinvsqO,rsqO,rsqH1,rsqH2; int n,k,ii,is3,ii3,ntiA,nj0,nj1; int jnra,jnrb,jnrc,jnrd; int j3a,j3b,j3c,j3d; int nri, ntype, nouter, ninner; int tja,tjb,tjc,tjd;#ifdef GMX_THREADS int nn0, nn1;#endif nouter = 0; ninner = 0; nri = *p_nri; ntype = *p_ntype; nul=vec_zero(); vfacel=load_float_and_splat(p_facel); vkrf=load_float_and_splat(p_krf); vcrf=load_float_and_splat(p_crf); ii = iinr[0]; iqO = vec_madd(load_float_and_splat(charge+ii),vfacel,nul); iqH = vec_madd(load_float_and_splat(charge+ii+1),vfacel,nul); ntiA = 2*ntype*type[ii]; #ifdef GMX_THREADS nthreads = *p_nthreads; do { gmx_thread_mutex_lock((gmx_thread_mutex_t *)mtx); nn0 = *count; nn1 = nn0+(nri-nn0)/(2*nthreads)+3; *count = nn1; gmx_thread_mutex_unlock((gmx_thread_mutex_t *)mtx); if(nn1>nri) nn1=nri; for(n=nn0; (n<nn1); n++) {#if 0 } /* maintain correct indentation even with conditional left braces */#endif#else /* without gmx_threads */ for(n=0;n<nri;n++) {#endif is3 = 3*shift[n]; ii = iinr[n]; ii3 = 3*ii; load_1_3atoms_shift_and_splat(pos+ii3,shiftvec+is3,&iOx,&iOy,&iOz, &iH1x,&iH1y,&iH1z,&iH2x,&iH2y,&iH2z); vctot = nul; Vvdwtot = nul; nj0 = jindex[n]; nj1 = jindex[n+1]; for(k=nj0; k<(nj1-3); k+=4) { jnra = jjnr[k]; jnrb = jjnr[k+1]; jnrc = jjnr[k+2]; jnrd = jjnr[k+3]; j3a = 3*jnra; j3b = 3*jnrb; j3c = 3*jnrc; j3d = 3*jnrd; transpose_4_to_3(load_xyz(pos+j3a), load_xyz(pos+j3b), load_xyz(pos+j3c), load_xyz(pos+j3d),&dH2x,&dH2y,&dH2z); dOx = vec_sub(iOx,dH2x); dOy = vec_sub(iOy,dH2y); dOz = vec_sub(iOz,dH2z); dH1x = vec_sub(iH1x,dH2x); dH1y = vec_sub(iH1y,dH2y); dH1z = vec_sub(iH1z,dH2z); dH2x = vec_sub(iH2x,dH2x); dH2y = vec_sub(iH2y,dH2y); dH2z = vec_sub(iH2z,dH2z); rsqO = vec_madd(dOx,dOx,nul); rsqH1 = vec_madd(dH1x,dH1x,nul); rsqH2 = vec_madd(dH2x,dH2x,nul); rsqO = vec_madd(dOy,dOy,rsqO); rsqH1 = vec_madd(dH1y,dH1y,rsqH1); rsqH2 = vec_madd(dH2y,dH2y,rsqH2); rsqO = vec_madd(dOz,dOz,rsqO); rsqH1 = vec_madd(dH1z,dH1z,rsqH1); rsqH2 = vec_madd(dH2z,dH2z,rsqH2); do_3_invsqrt(rsqO,rsqH1,rsqH2,&rinvO,&rinvH1,&rinvH2); rinvsqO = vec_madd(rinvO,rinvO,nul); rinvsix = vec_madd(rinvsqO,rinvsqO,nul); rinvsix = vec_madd(rinvsix,rinvsqO,nul); tja = ntiA+2*type[jnra]; tjb = ntiA+2*type[jnrb]; tjc = ntiA+2*type[jnrc]; tjd = ntiA+2*type[jnrd]; /* load 4 j charges and multiply by iq */ jq=load_4_float(charge+jnra,charge+jnrb, charge+jnrc,charge+jnrd); load_4_pair(vdwparam+tja,vdwparam+tjb,vdwparam+tjc,vdwparam+tjd,&c6,&c12); qqO = vec_madd(iqO,jq,nul); qqH = vec_madd(iqH,jq,nul); krsqO = vec_madd(vkrf,rsqO,nul); krsqH1 = vec_madd(vkrf,rsqH1,nul); krsqH2 = vec_madd(vkrf,rsqH2,nul); Vvdwtot = vec_nmsub(c6,rinvsix,Vvdwtot); Vvdwtot = vec_madd(c12,vec_madd(rinvsix,rinvsix,nul), Vvdwtot); vcoulO = vec_add(rinvO,krsqO); vcoulH1 = vec_add(rinvH1,krsqH1); vcoulH2 = vec_add(rinvH2,krsqH2); vcoulO = vec_sub(vcoulO,vcrf); vcoulH1 = vec_sub(vcoulH1,vcrf); vcoulH2 = vec_sub(vcoulH2,vcrf); vctot = vec_madd(qqO,vcoulO,vctot); vctot = vec_madd(qqH,vcoulH1,vctot); vctot = vec_madd(qqH,vcoulH2,vctot); } if(k<(nj1-2)) { jnra = jjnr[k]; jnrb = jjnr[k+1]; jnrc = jjnr[k+2]; j3a = 3*jnra; j3b = 3*jnrb; j3c = 3*jnrc; transpose_4_to_3(load_xyz(pos+j3a), load_xyz(pos+j3b), load_xyz(pos+j3c),nul,&dH2x,&dH2y,&dH2z); dOx = vec_sub(iOx,dH2x); dOy = vec_sub(iOy,dH2y); dOz = vec_sub(iOz,dH2z); dH1x = vec_sub(iH1x,dH2x); dH1y = vec_sub(iH1y,dH2y); dH1z = vec_sub(iH1z,dH2z); dH2x = vec_sub(iH2x,dH2x); dH2y = vec_sub(iH2y,dH2y); dH2z = vec_sub(iH2z,dH2z); rsqO = vec_madd(dOx,dOx,nul); rsqH1 = vec_madd(dH1x,dH1x,nul); rsqH2 = vec_madd(dH2x,dH2x,nul); rsqO = vec_madd(dOy,dOy,rsqO); rsqH1 = vec_madd(dH1y,dH1y,rsqH1); rsqH2 = vec_madd(dH2y,dH2y,rsqH2); rsqO = vec_madd(dOz,dOz,rsqO); rsqH1 = vec_madd(dH1z,dH1z,rsqH1); rsqH2 = vec_madd(dH2z,dH2z,rsqH2); zero_highest_element_in_3_vectors(&rsqO,&rsqH1,&rsqH2); do_3_invsqrt(rsqO,rsqH1,rsqH2,&rinvO,&rinvH1,&rinvH2); zero_highest_element_in_3_vectors(&rinvO,&rinvH1,&rinvH2); rinvsqO = vec_madd(rinvO,rinvO,nul); rinvsix = vec_madd(rinvsqO,rinvsqO,nul); rinvsix = vec_madd(rinvsix,rinvsqO,nul); jq=load_3_float(charge+jnra,charge+jnrb,charge+jnrc); tja = ntiA+2*type[jnra]; tjb = ntiA+2*type[jnrb]; tjc = ntiA+2*type[jnrc]; load_3_pair(vdwparam+tja,vdwparam+tjb,vdwparam+tjc,&c6,&c12); qqO = vec_madd(iqO,jq,nul); qqH = vec_madd(iqH,jq,nul); krsqO = vec_madd(vkrf,rsqO,nul); krsqH1 = vec_madd(vkrf,rsqH1,nul); krsqH2 = vec_madd(vkrf,rsqH2,nul); Vvdwtot = vec_nmsub(c6,rinvsix,Vvdwtot); Vvdwtot = vec_madd(c12,vec_madd(rinvsix,rinvsix,nul), Vvdwtot); vcoulO = vec_add(rinvO,krsqO); vcoulH1 = vec_add(rinvH1,krsqH1); vcoulH2 = vec_add(rinvH2,krsqH2); vcoulO = vec_sub(vcoulO,vcrf); vcoulH1 = vec_sub(vcoulH1,vcrf); vcoulH2 = vec_sub(vcoulH2,vcrf); vctot = vec_madd(qqO,vcoulO,vctot); vctot = vec_madd(qqH,vcoulH1,vctot); vctot = vec_madd(qqH,vcoulH2,vctot); } else if(k<(nj1-1)) { jnra = jjnr[k]; jnrb = jjnr[k+1]; j3a = 3*jnra; j3b = 3*jnrb; transpose_2_to_3(load_xyz(pos+j3a), load_xyz(pos+j3b),&dH2x,&dH2y,&dH2z); dOx = vec_sub(iOx,dH2x); dOy = vec_sub(iOy,dH2y); dOz = vec_sub(iOz,dH2z); dH1x = vec_sub(iH1x,dH2x); dH1y = vec_sub(iH1y,dH2y); dH1z = vec_sub(iH1z,dH2z); dH2x = vec_sub(iH2x,dH2x); dH2y = vec_sub(iH2y,dH2y); dH2z = vec_sub(iH2z,dH2z); rsqO = vec_madd(dOx,dOx,nul); rsqH1 = vec_madd(dH1x,dH1x,nul); rsqH2 = vec_madd(dH2x,dH2x,nul); rsqO = vec_madd(dOy,dOy,rsqO); rsqH1 = vec_madd(dH1y,dH1y,rsqH1); rsqH2 = vec_madd(dH2y,dH2y,rsqH2); rsqO = vec_madd(dOz,dOz,rsqO); rsqH1 = vec_madd(dH1z,dH1z,rsqH1); rsqH2 = vec_madd(dH2z,dH2z,rsqH2); zero_highest_2_elements_in_3_vectors(&rsqO,&rsqH1,&rsqH2); do_3_invsqrt(rsqO,rsqH1,rsqH2,&rinvO,&rinvH1,&rinvH2); zero_highest_2_elements_in_3_vectors(&rinvO,&rinvH1,&rinvH2); rinvsqO = vec_madd(rinvO,rinvO,nul); rinvsix = vec_madd(rinvsqO,rinvsqO,nul); rinvsix = vec_madd(rinvsix,rinvsqO,nul); tja = ntiA+2*type[jnra]; tjb = ntiA+2*type[jnrb]; /* load 2 j charges and multiply by iq */ jq=load_2_float(charge+jnra,charge+jnrb); load_2_pair(vdwparam+tja,vdwparam+tjb,&c6,&c12); qqO = vec_madd(iqO,jq,nul); qqH = vec_madd(iqH,jq,nul); krsqO = vec_madd(vkrf,rsqO,nul); krsqH1 = vec_madd(vkrf,rsqH1,nul); krsqH2 = vec_madd(vkrf,rsqH2,nul); Vvdwtot = vec_nmsub(c6,rinvsix,Vvdwtot); Vvdwtot = vec_madd(c12,vec_madd(rinvsix,rinvsix,nul), Vvdwtot); vcoulO = vec_add(rinvO,krsqO); vcoulH1 = vec_add(rinvH1,krsqH1); vcoulH2 = vec_add(rinvH2,krsqH2); vcoulO = vec_sub(vcoulO,vcrf); vcoulH1 = vec_sub(vcoulH1,vcrf); vcoulH2 = vec_sub(vcoulH2,vcrf); vctot = vec_madd(qqO,vcoulO,vctot); vctot = vec_madd(qqH,vcoulH1,vctot); vctot = vec_madd(qqH,vcoulH2,vctot); } else if(k<nj1) { jnra = jjnr[k]; j3a = 3*jnra; transpose_1_to_3(load_xyz(pos+j3a),&dH2x,&dH2y,&dH2z); dOx = vec_sub(iOx,dH2x); dOy = vec_sub(iOy,dH2y); dOz = vec_sub(iOz,dH2z); dH1x = vec_sub(iH1x,dH2x); dH1y = vec_sub(iH1y,dH2y); dH1z = vec_sub(iH1z,dH2z); dH2x = vec_sub(iH2x,dH2x); dH2y = vec_sub(iH2y,dH2y); dH2z = vec_sub(iH2z,dH2z); rsqO = vec_madd(dOx,dOx,nul); rsqH1 = vec_madd(dH1x,dH1x,nul); rsqH2 = vec_madd(dH2x,dH2x,nul); rsqO = vec_madd(dOy,dOy,rsqO); rsqH1 = vec_madd(dH1y,dH1y,rsqH1); rsqH2 = vec_madd(dH2y,dH2y,rsqH2); rsqO = vec_madd(dOz,dOz,rsqO); rsqH1 = vec_madd(dH1z,dH1z,rsqH1); rsqH2 = vec_madd(dH2z,dH2z,rsqH2); zero_highest_3_elements_in_3_vectors(&rsqO,&rsqH1,&rsqH2); do_3_invsqrt(rsqO,rsqH1,rsqH2,&rinvO,&rinvH1,&rinvH2); zero_highest_3_elements_in_3_vectors(&rinvO,&rinvH1,&rinvH2); rinvsqO = vec_madd(rinvO,rinvO,nul); rinvsix = vec_madd(rinvsqO,rinvsqO,nul); rinvsix = vec_madd(rinvsix,rinvsqO,nul); tja = ntiA+2*type[jnra]; /* load 1 j charges and multiply by iq */ jq=load_1_float(charge+jnra); load_1_pair(vdwparam+tja,&c6,&c12); qqO = vec_madd(iqO,jq,nul); qqH = vec_madd(iqH,jq,nul); krsqO = vec_madd(vkrf,rsqO,nul); krsqH1 = vec_madd(vkrf,rsqH1,nul); krsqH2 = vec_madd(vkrf,rsqH2,nul); Vvdwtot = vec_nmsub(c6,rinvsix,Vvdwtot); Vvdwtot = vec_madd(c12,vec_madd(rinvsix,rinvsix,nul), Vvdwtot); vcoulO = vec_add(rinvO,krsqO); vcoulH1 = vec_add(rinvH1,krsqH1); vcoulH2 = vec_add(rinvH2,krsqH2); vcoulO = vec_sub(vcoulO,vcrf); vcoulH1 = vec_sub(vcoulH1,vcrf); vcoulH2 = vec_sub(vcoulH2,vcrf); vctot = vec_madd(qqO,vcoulO,vctot); vctot = vec_madd(qqH,vcoulH1,vctot); vctot = vec_madd(qqH,vcoulH2,vctot); } /* update outer data */ add_vector_to_float(Vc+gid[n],vctot); add_vector_to_float(Vvdw+gid[n],Vvdwtot); ninner += nj1 - nj0; }#ifdef GMX_THREADS nouter += nn1 - nn0; } while (nn1<nri);#else nouter = nri;#endif *outeriter = nouter; *inneriter = ninner;}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?