📄 compute_abs_a.cc
字号:
int kl = 0; for(int k=0;k<nocc_act;k++) for(int l=0;l<=k;l++,kl++) { double pfac_kl = (k==l) ? oosqrt2 : 1.0; int kl_proc = kl%nproc_with_ints; if (kl_proc != proc_with_ints[me]) continue; int kl_aa = k*(k-1)/2 + l; int kl_ab = k*nocc_act + l; int lk_ab = l*nocc_act + k; // Get (|r12|) and (|[r12,T1]|) integrals only tim_enter("MO ints retrieve"); double *klox_buf_eri = r12intsacc->retrieve_pair_block(k,l,R12IntsAcc::eri); double *klox_buf_r12 = r12intsacc->retrieve_pair_block(k,l,R12IntsAcc::r12); double *klox_buf_r12t1 = r12intsacc->retrieve_pair_block(k,l,R12IntsAcc::r12t1); double *klox_buf_r12t2 = r12intsacc->retrieve_pair_block(k,l,R12IntsAcc::r12t2); double *lkox_buf_eri = r12intsacc->retrieve_pair_block(l,k,R12IntsAcc::eri); double *lkox_buf_r12 = r12intsacc->retrieve_pair_block(l,k,R12IntsAcc::r12); double *lkox_buf_r12t1 = r12intsacc->retrieve_pair_block(l,k,R12IntsAcc::r12t1); double *lkox_buf_r12t2 = r12intsacc->retrieve_pair_block(l,k,R12IntsAcc::r12t2); tim_exit("MO ints retrieve"); int ij = 0; for(int i=0;i<nocc_act;i++) for(int j=0;j<=i;j++,ij++) { double pfac_ij = (i==j) ? oosqrt2 : 1.0; int ij_aa = i*(i-1)/2 + j; int ij_ab = i*nocc_act + j; int ji_ab = j*nocc_act + i; tim_enter("MO ints retrieve"); double *ijox_buf_r12 = r12intsacc->retrieve_pair_block(i,j,R12IntsAcc::r12); double *jiox_buf_r12 = r12intsacc->retrieve_pair_block(j,i,R12IntsAcc::r12); tim_exit("MO ints retrieve"); double *Vaa_ij = Vaa_ijkl + ij_aa*naa; double *Vab_ij = Vab_ijkl + ij_ab*nab; double *Vab_ji = Vab_ijkl + ji_ab*nab; double *Xaa_ij = Xaa_ijkl + ij_aa*naa; double *Xab_ij = Xab_ijkl + ij_ab*nab; double *Xab_ji = Xab_ijkl + ji_ab*nab; double *Taa_ij = Taa_ijkl + ij_aa*naa; double *Tab_ij = Tab_ijkl + ij_ab*nab; double *Tab_ji = Tab_ijkl + ji_ab*nab; tim_enter("MO ints contraction"); double Vaa_ijkl, Vab_ijkl, Vab_jikl, Vab_ijlk, Vab_jilk; double Xaa_ijkl, Xab_ijkl, Xab_jikl, Xab_ijlk, Xab_jilk; double Taa_ijkl, Tab_ijkl, Tab_jikl, Tab_ijlk, Tab_jilk; Vaa_ijkl = Vab_ijkl = Vab_jikl = Vab_ijlk = Vab_jilk = 0.0; Xaa_ijkl = Xab_ijkl = Xab_jikl = Xab_ijlk = Xab_jilk = 0.0; Taa_ijkl = Tab_ijkl = Tab_jikl = Tab_ijlk = Tab_jilk = 0.0; for(int o=0;o<nocc;o++) { double pfac_xy = 1.0; for(int x=0;x<noso_aux;x++) { int ox_offset = o*noso_aux + x; double ij_r12_ox = ijox_buf_r12[ox_offset]; double ji_r12_ox = jiox_buf_r12[ox_offset]; double kl_eri_ox = klox_buf_eri[ox_offset]; double lk_eri_ox = lkox_buf_eri[ox_offset]; Vab_ijkl -= pfac_xy * (ij_r12_ox * kl_eri_ox + ji_r12_ox * lk_eri_ox); if (i != j) Vab_jikl -= pfac_xy * (ji_r12_ox * kl_eri_ox + ij_r12_ox * lk_eri_ox); if (k != l) Vab_ijlk -= pfac_xy * (ij_r12_ox * lk_eri_ox + ji_r12_ox * kl_eri_ox); if (i != j && k != l) { Vaa_ijkl -= pfac_xy * (ij_r12_ox - ji_r12_ox)*(kl_eri_ox - lk_eri_ox); Vab_jilk -= pfac_xy * (ij_r12_ox * kl_eri_ox + ji_r12_ox * lk_eri_ox); } double kl_r12_ox = klox_buf_r12[ox_offset]; double lk_r12_ox = lkox_buf_r12[ox_offset]; Xab_ijkl -= pfac_xy * (ij_r12_ox * kl_r12_ox + ji_r12_ox * lk_r12_ox); if (i != j) Xab_jikl -= pfac_xy * (ji_r12_ox * kl_r12_ox + ij_r12_ox * lk_r12_ox); if (k != l) Xab_ijlk -= pfac_xy * (ij_r12_ox * lk_r12_ox + ji_r12_ox * kl_r12_ox); if (i != j && k != l) { Xaa_ijkl -= pfac_xy * (ij_r12_ox - ji_r12_ox)*(kl_r12_ox - lk_r12_ox); Xab_jilk -= pfac_xy * (ij_r12_ox * kl_r12_ox + ji_r12_ox * lk_r12_ox); } double kl_r12t1_ox = klox_buf_r12t1[ox_offset]; double kl_r12t2_ox = klox_buf_r12t2[ox_offset]; double lk_r12t1_ox = lkox_buf_r12t1[ox_offset]; double lk_r12t2_ox = lkox_buf_r12t2[ox_offset]; double kl_Tr12_ox = -kl_r12t1_ox-kl_r12t2_ox; double lk_Tr12_ox = -lk_r12t1_ox-lk_r12t2_ox; Tab_ijkl += pfac_xy * (ij_r12_ox * kl_Tr12_ox + ji_r12_ox * lk_Tr12_ox); if (i != j) Tab_jikl += pfac_xy * (ji_r12_ox * kl_Tr12_ox + ij_r12_ox * lk_Tr12_ox); if (k != l) Tab_ijlk += pfac_xy * (ij_r12_ox * lk_Tr12_ox + ji_r12_ox * kl_Tr12_ox); if (i != j && k != l) { Taa_ijkl += pfac_xy * (ij_r12_ox - ji_r12_ox)*(kl_Tr12_ox - lk_Tr12_ox); Tab_jilk += pfac_xy * (ij_r12_ox * kl_Tr12_ox + ji_r12_ox * lk_Tr12_ox); } } } Vab_ij[kl_ab] += Vab_ijkl; if (i != j) Vab_ji[kl_ab] += Vab_jikl; if (k != l) Vab_ij[lk_ab] += Vab_ijlk; if (i != j && k != l) { Vaa_ij[kl_aa] += Vaa_ijkl; Vab_ji[lk_ab] += Vab_jilk; } Xab_ij[kl_ab] += Xab_ijkl; if (i != j) Xab_ji[kl_ab] += Xab_jikl; if (k != l) Xab_ij[lk_ab] += Xab_ijlk; if (i != j && k != l) { Xaa_ij[kl_aa] += Xaa_ijkl; Xab_ji[lk_ab] += Xab_jilk; } Tab_ij[kl_ab] += Tab_ijkl; if (i != j) Tab_ji[kl_ab] += Tab_jikl; if (k != l) Tab_ij[lk_ab] += Tab_ijlk; if (i != j && k != l) { Taa_ij[kl_aa] += Taa_ijkl; Tab_ji[lk_ab] += Tab_jilk; } tim_exit("MO ints contraction");#if PRINT_R12_INTERMED if (i != j && k != l) printf("Vaa[%d][%d] = %lf\n",ij_aa,kl_aa,Vaa_ij[kl_aa]); printf("Vab[%d][%d] = %lf\n",ij_ab,kl_ab,Vab_ij[kl_ab]); if (i != j) printf("Vab[%d][%d] = %lf\n",ji_ab,kl_ab,Vab_ji[kl_ab]); if (k != l) printf("Vab[%d][%d] = %lf\n",ij_ab,lk_ab,Vab_ij[lk_ab]); if (i != j && k != l) printf("Vab[%d][%d] = %lf\n",ji_ab,lk_ab,Vab_ji[lk_ab]); if (i != j && k != l) printf("Xaa[%d][%d] = %lf\n",ij_aa,kl_aa,Xaa_ij[kl_aa]); printf("Xab[%d][%d] = %lf\n",ij_ab,kl_ab,Xab_ij[kl_ab]); if (i != j) printf("Xab[%d][%d] = %lf\n",ji_ab,kl_ab,Xab_ji[kl_ab]); if (k != l) printf("Xab[%d][%d] = %lf\n",ij_ab,lk_ab,Xab_ij[lk_ab]); if (i != j && k != l) printf("Xab[%d][%d] = %lf\n",ji_ab,lk_ab,Xab_ji[lk_ab]); if (i != j && k != l) printf("Taa[%d][%d] = %lf\n",ij_aa,kl_aa,Taa_ij[kl_aa]); printf("Tab[%d][%d] = %lf\n",ij_ab,kl_ab,Tab_ij[kl_ab]); if (i != j) printf("Tab[%d][%d] = %lf\n",ji_ab,kl_ab,Tab_ji[kl_ab]); if (k != l) printf("Tab[%d][%d] = %lf\n",ij_ab,lk_ab,Tab_ij[lk_ab]); if (i != j && k != l) printf("Tab[%d][%d] = %lf\n",ji_ab,lk_ab,Tab_ji[lk_ab]);#endif r12intsacc->release_pair_block(i,j,R12IntsAcc::r12); r12intsacc->release_pair_block(j,i,R12IntsAcc::r12); } r12intsacc->release_pair_block(k,l,R12IntsAcc::eri); r12intsacc->release_pair_block(k,l,R12IntsAcc::r12); r12intsacc->release_pair_block(k,l,R12IntsAcc::r12t1); r12intsacc->release_pair_block(k,l,R12IntsAcc::r12t2); r12intsacc->release_pair_block(l,k,R12IntsAcc::eri); r12intsacc->release_pair_block(l,k,R12IntsAcc::r12); r12intsacc->release_pair_block(l,k,R12IntsAcc::r12t1); r12intsacc->release_pair_block(l,k,R12IntsAcc::r12t2); } } else { // tasks that have nothing to do should still create timers tim_enter("MO ints retrieve"); tim_exit("MO ints retrieve"); tim_enter("MO ints contraction"); tim_exit("MO ints contraction"); } delete[] proc_with_ints; tim_exit("mp2-r12a intermeds"); r12intsacc->deactivate(); if (debug_) ExEnv::out0() << indent << "Computed intermediates V, X, and T" << endl; if (nproc > 1) { // Use MemoryGrp to accumulate contributions to intermediates V, X, and T on all nodes msg->sum(Vaa_ijkl,naa*naa,0,-1); msg->sum(Vab_ijkl,nab*nab,0,-1); msg->sum(Xaa_ijkl,naa*naa,0,-1); msg->sum(Xab_ijkl,nab*nab,0,-1); msg->sum(Taa_ijkl,naa*naa,0,-1); msg->sum(Tab_ijkl,nab*nab,0,-1); } if (debug_) ExEnv::out0() << indent << "Gathered intermediates V, X, and T" << endl; // Add intermediates contribution to their global values for(int ij=0;ij<naa;ij++) for(int kl=0;kl<=ij;kl++) { int ijkl = ij*naa+kl; int klij = kl*naa+ij; double velem = Vaa->get_element(ij,kl) + Vaa_ijkl[ijkl]; Vaa->set_element(ij,kl,velem); if (ij != kl) { velem = Vaa->get_element(kl,ij) + Vaa_ijkl[klij]; Vaa->set_element(kl,ij,velem); } double xelem = Xaa->get_element(ij,kl) + Xaa_ijkl[ijkl]; Xaa->set_element(ij,kl,xelem); if (ij != kl) { xelem = Xaa->get_element(kl,ij) + Xaa_ijkl[klij]; Xaa->set_element(kl,ij,xelem); } double belem = Baa->get_element(ij,kl) + 0.5*(Taa_ijkl[ijkl] + Taa_ijkl[klij]); Baa->set_element(ij,kl,belem); Baa->set_element(kl,ij,belem); } for(int ij=0;ij<nab;ij++) for(int kl=0;kl<=ij;kl++) { int ijkl = ij*nab+kl; int klij = kl*nab+ij; double velem = Vab->get_element(ij,kl) + Vab_ijkl[ijkl]; Vab->set_element(ij,kl,velem); if (ij != kl) { velem = Vab->get_element(kl,ij) + Vab_ijkl[klij]; Vab->set_element(kl,ij,velem); } double xelem = Xab->get_element(ij,kl) + Xab_ijkl[ijkl]; Xab->set_element(ij,kl,xelem); if (ij != kl) { xelem = Xab->get_element(kl,ij) + Xab_ijkl[klij]; Xab->set_element(kl,ij,xelem); } double belem = Bab->get_element(ij,kl) + 0.5*(Tab_ijkl[ijkl] + Tab_ijkl[klij]); Bab->set_element(ij,kl,belem); Bab->set_element(kl,ij,belem); } msg->sum(aoint_computed); if (me == 0 && mole->if_to_checkpoint() && r12intsacc->can_restart()) { StateOutBin stateout(mole->checkpoint_file()); SavableState::save_state(mole,stateout); ExEnv::out0() << indent << "Checkpointed the wave function" << endl; }#if PRINT_BIGGEST_INTS biggest_ints_1.combine(msg); biggest_ints_2.combine(msg); biggest_ints_2s.combine(msg); biggest_ints_3a.combine(msg); biggest_ints_3.combine(msg);#endif#if PRINT_BIGGEST_INTS ExEnv::out0() << "biggest 1/4 transformed ints" << endl; for (int i=0; i<biggest_ints_1.ncontrib(); i++) { ExEnv::out0() << scprintf("%3d %3d %3d %3d %16.12f", biggest_ints_1.indices(i)[0], biggest_ints_1.indices(i)[1], biggest_ints_1.indices(i)[2], biggest_ints_1.indices(i)[3], biggest_ints_1.val(i) ) << endl; } ExEnv::out0() << "biggest 2/4 transformed ints" << endl; for (int i=0; i<biggest_ints_2.ncontrib(); i++) { ExEnv::out0() << scprintf("%3d %3d %3d %3d %16.12f", biggest_ints_2.indices(i)[0], biggest_ints_2.indices(i)[1], biggest_ints_2.indices(i)[2], biggest_ints_2.indices(i)[3], biggest_ints_2.val(i) ) << endl; } ExEnv::out0() << "restricted 2/4 transformed ints" << endl; for (int i=0; i<biggest_ints_2s.ncontrib(); i++) { ExEnv::out0() << scprintf("%3d %3d %3d %3d %16.12f", biggest_ints_2s.indices(i)[0], biggest_ints_2s.indices(i)[1], biggest_ints_2s.indices(i)[2], biggest_ints_2s.indices(i)[3], biggest_ints_2s.val(i) ) << endl; } ExEnv::out0() << "biggest 3/4 transformed ints (in 3.)" << endl; for (int i=0; i<biggest_ints_3a.ncontrib(); i++) { ExEnv::out0() << scprintf("%3d %3d %3d %3d %16.12f", biggest_ints_3a.indices(i)[0], biggest_ints_3a.indices(i)[1], biggest_ints_3a.indices(i)[2], biggest_ints_3a.indices(i)[3], biggest_ints_3a.val(i) ) << endl; } ExEnv::out0() << "biggest 3/4 transformed ints (in 4.)" << endl; for (int i=0; i<biggest_ints_3.ncontrib(); i++) { ExEnv::out0() << scprintf("%3d %3d %3d %3d %16.12f", biggest_ints_3.indices(i)[0], biggest_ints_3.indices(i)[1], biggest_ints_3.indices(i)[2], biggest_ints_3.indices(i)[3], biggest_ints_3.val(i) ) << endl; }#endif // Print out various energies etc. if (debug_) { ExEnv::out0() << indent << "Number of shell quartets for which AO integrals\n" << indent << "would have been computed without bounds checking: " << npass*nshell*(nshell+1)*nshell*nshell_aux/2 << endl; ExEnv::out0() << indent << "Number of shell quartets for which AO integrals\n" << indent << "were computed: " << aoint_computed << endl; } /*-------------------------- Cleanup --------------------------*/ delete[] Vaa_ijkl; delete[] Vab_ijkl; delete[] Xaa_ijkl; delete[] Xab_ijkl; delete[] Taa_ijkl; delete[] Tab_ijkl; for (int i=0; i<thr->nthread(); i++) { delete e123thread[i]; } delete[] e123thread; delete[] tbints; tbints = 0; delete[] scf_vector; delete[] scf_vector_dat; delete[] evals; tim_exit("r12a-abs-mem"); evaluated_ = true; if (me == 0 && mole->if_to_checkpoint()) { StateOutBin stateout(mole->checkpoint_file()); SavableState::save_state(mole,stateout); ExEnv::out0() << indent << "Checkpointed the wave function" << endl; } return;}///////////////////////////////////////////////////////// Compute the batchsize for the transformation//// Only arrays allocated before exiting the loop over// i-batches are included here - only these arrays// affect the batch size.///////////////////////////////////////////////////////intR12IntEval_abs_A::compute_transform_batchsize_(size_t mem_alloc, size_t mem_static, int nocc_act, const int num_te_types){ // Check is have enough for even static objects size_t mem_dyn = 0; if (mem_alloc <= mem_static) return 0; else mem_dyn = mem_alloc - mem_static; // Determine if calculation is possible at all (i.e., if ni=1 possible) int ni = 1; distsize_t maxdyn = compute_transform_dynamic_memory_(ni, nocc_act, num_te_types); if (maxdyn > mem_dyn) { return 0; } ni = 2; while (ni<=nocc_act) { maxdyn = compute_transform_dynamic_memory_(ni, nocc_act, num_te_types); if (maxdyn >= mem_dyn) { ni--; break; } ni++; } if (ni > nocc_act) ni = nocc_act; return ni;}//////////////////////////////////////////////////////// Compute required (dynamic) memory// for a given batch size of the transformation//// Only arrays allocated before exiting the loop over// i-batches are included here - only these arrays// affect the batch size.//////////////////////////////////////////////////////distsize_tR12IntEval_abs_A::compute_transform_dynamic_memory_(int ni, int nocc_act, const int num_te_types){ int nproc = r12info()->msg()->n(); /////////////////////////////////////// // the largest memory requirement will // occur just before // the end of the i-batch loop (mem) /////////////////////////////////////// // compute nij as nij on node 0, since nij on node 0 is >= nij on other nodes int index = 0; int nij = 0; for (int i=0; i<ni; i++) { for (int j=0; j<nocc_act; j++) { if (index++ % nproc == 0) nij++; } } int nbasis = r12info()->basis()->nbasis(); int nfuncmax = r12info()->basis()->max_nfunction_in_shell(); int nbasis_aux = r12info()->basis_aux()->nbasis(); int nfuncmax_aux = r12info()->basis_aux()->max_nfunction_in_shell(); int nthread = r12info()->thr()->nthread(); int nocc = r12info()->nocc(); distsize_t memsize = sizeof(double)*(num_te_types*((distsize_t)nthread * ni * nbasis * nfuncmax * nfuncmax_aux // iqrs + (distsize_t)ni * nocc * nfuncmax_aux * nfuncmax_aux // ikrs + (distsize_t)nij * nocc * nbasis_aux // ikjs - buffer of 3 q.t. and higher // transformed integrals ) + (distsize_t)nocc * nfuncmax_aux // ks + (distsize_t)nocc * nbasis_aux // kx ); return memsize;}////////////////////////////////////////////////////////////////////////////// Local Variables:// mode: c++// c-file-style: "CLJ-CONDENSED"// End:
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -