📄 csgrad.cc
字号:
if (debug_ && me == 0) { ExEnv::out0() << indent << "End of Pab and Wab" << endl; } // end of debug print compute_L: /////////////////////////////////////// // Update Waj and Laj with contrib. // from (oo|ov) and (ov|oo) integrals; // here a is active and j is active or // frozen /////////////////////////////////////// tim_enter("Waj and Laj"); // (oo|ov) contribution index = 0; ik_index = 0; for (i=0; i<ni; i++) { for (k=0; k<nocc; k++) { if (index++ % nproc == me) { if (k>=nfzc) { offset = nbasis*nocc + nbasis*nbasis*ik_index; for (j=0; j<nocc; j++) { for (b=0; b<nvir_act; b++) { ibka_ptr = &mo_int[b+nocc + offset]; ijkb_ptr = &mo_int[j + nbasis*b + offset]; if (dograd) waj_ptr = &Waj[j*nvir]; // order as j*nvir+a to make loops more efficient laj_ptr = &Laj[j*nvir]; for (a=0; a<nvir_act; a++) { tmpval = 2**ibka_ptr * *ijkb_ptr; ibka_ptr += nbasis; if (dograd) *waj_ptr++ += tmpval; *laj_ptr++ -= tmpval; // This term had the wrong sign in Frisch's paper } // exit a loop } // exit b loop } // exit j loop } // endif ik_index++; } // endif } // exit k loop } // exit i loop // (ov|oo) contribution index = 0; ik_index = 0; for (i=0; i<ni; i++) { for (k=0; k<nocc; k++) { if (index++ % nproc == me) { if (k>=nfzc) { offset = nocc + nbasis*nbasis*ik_index; for (b=0; b<nvir_act; b++) { for (j=0; j<nocc; j++) { ibkj_ptr = &mo_int[offset + b + j*nbasis]; ibka_ptr = &mo_int[offset + b + nocc*nbasis]; if (dograd) waj_ptr = &Waj[j*nvir]; laj_ptr = &Laj[j*nvir]; for (a=0; a<nvir_act; a++) { tmpval = 4 * *ibka_ptr * *ibkj_ptr; ibka_ptr += nbasis; if (dograd) *waj_ptr++ -= tmpval; *laj_ptr++ += tmpval; // This term had the wrong sign in Frisch's paper } // exit a loop } // exit j loop } // exit b loop } // endif ik_index++; } // endif } // exit k loop } // exit i loop tim_exit("Waj and Laj"); ///////////////////////////// // End of Waj and Laj update ///////////////////////////// // debug print if (debug_ && me == 0) { ExEnv::out0() << indent << "End of Paj and Waj" << endl; } // end of debug print mo_int = 0; mem->sync(); // Need to synchronize before deleting mo_intbuf mo_int = (double*) mem->localdata(); gamma_iajs_tmp = new double[nbasis*nvir_act]; if (!gamma_iajs_tmp) { ExEnv::outn() << indent << "Could not allocate gamma_iajs_tmp" << endl; abort(); } // debug print if (debug_ && me == 0) { ExEnv::out0() << indent << "Begin first and second q.b.t." << endl; } // end of debug print /////////////////////////////////////////////////////////// // Perform first and second quarter back-transformation. // Each node produces gamma_iajs, and gamma_iqjs // for a subset of i and j, all a and all s; // the back-transf. is done only for active i, j, a, and b /////////////////////////////////////////////////////////// // Begin first quarter back-transformation tim_enter("1. q.b.t."); index = 0; ij_index = 0; for (i=0; i<ni; i++) { for (j=0; j<nocc; j++) { if (index++ % nproc == me) { if (j>=nfzc) { bzerofast(gamma_iajs_tmp,nbasis*nvir_act); offset = nocc + nocc*nbasis + nbasis*nbasis*ij_index; for (a=0; a<nvir_act; a++) { for (s=0; s<nbasis; s++) { c_sb = &scf_vector[s][nocc]; gamma_iajs_ptr = &gamma_iajs_tmp[s*nvir_act + a]; ibja_ptr = &mo_int[a*nbasis + offset]; iajb_ptr = &mo_int[a + offset]; tmpval = 0.0; for (b=0; b<nvir_act; b++) { tmpval += 2**c_sb++ * (2**iajb_ptr - *ibja_ptr++); iajb_ptr += nbasis; } // exit b loop *gamma_iajs_ptr += tmpval; } // exit s loop } // exit a loop // Put gamma_iajs_tmp into mo_int for one i,j // while overwriting mo_int gamma_iajs_ptr = gamma_iajs_tmp; for (y=0; y<nbasis; y++) { iajy_ptr = &mo_int[nocc + nbasis*(y + nbasis*ij_index)]; for (a=0; a<nvir_act; a++) { *iajy_ptr++ = *gamma_iajs_ptr++; } } } // endif ij_index++; } // endif } // exit j loop } // exit i loop // end of first quarter back-transformation tim_exit("1. q.b.t."); // debug print if (debug_ && me == 0) { ExEnv::out0() << indent << "End of first q.b.t." << endl; } // end of debug print mo_int = 0; mem->sync(); // Make sure all nodes are done with gamma_iajs_tmp before renaming delete[] gamma_iajs_tmp; // The array mo_int has now been overwritten by the quarter // back-transformed non-sep 2PDM gamma_iajs, so rename gamma_iajs = (double*) mem->localdata(); gamma_iqjs_tmp = new double[nbasis]; if (!gamma_iqjs_tmp) { ExEnv::errn() << "Could not allocate gamma_iqjs_tmp" << endl; abort(); } if (debug_ && me == 0) { ExEnv::out0() << indent << "Begin second q.b.t." << endl; } // Begin second quarter back-transformation // (gamma_iqjs elements ordered as i,j,s,q, // i.e., q varies fastest) tim_enter("2. q.b.t."); index = 0; ij_index = 0; for (i=0; i<ni; i++) { for (j=0; j<nocc; j++) { if (index++ % nproc == me) { if (j>=nfzc) { offset = nbasis*nbasis*ij_index; for (s=0; s<nbasis; s++) { bzerofast(gamma_iqjs_tmp,nbasis); for (q=0; q<nbasis; q++) { gamma_iqjs_ptr = &gamma_iqjs_tmp[q]; gamma_iajs_ptr = &gamma_iajs[nocc + s*nbasis + offset]; c_qa = &scf_vector[q][nocc]; tmpval = 0.0; for (a=0; a<nvir_act; a++) { tmpval += *c_qa++ * *gamma_iajs_ptr++; } // exit a loop *gamma_iqjs_ptr += tmpval; } // exit q loop // Put gamma_iqjs_tmp into gamma_iajs for one i,j,s // while overwriting gamma_iajs gamma_iajs_ptr = &gamma_iajs[s*nbasis + offset]; gamma_iqjs_ptr = gamma_iqjs_tmp; for (q=0; q<nbasis; q++) { *gamma_iajs_ptr++ = *gamma_iqjs_ptr++; } } // exit s loop } // endif ij_index++; } // endif } // exit j loop } // exit i loop tim_exit("2. q.b.t."); // end of second quarter back-transformation if (debug_ && me == 0) { ExEnv::out0() << indent << "End of second q.b.t." << endl; } gamma_iajs = 0; mem->sync(); // Keep this here to make sure all nodes have gamma_iqjs // before it is needed below, and that gamma_iajs is not // deleted prematurely // The quarter back-transformed elements gamma_iajs have now been // overwritten by the half back-transformed elements gamma_iqjs delete[] gamma_iqjs_tmp; ///////////////////////////////////////////////// // End of 1. and 2. quarter back-transformation ///////////////////////////////////////////////// Lpi = new double[nbasis*ni]; bzerofast(Lpi,nbasis*ni); if (me == 0) { ExEnv::out0() << indent << "Begin third and fourth q.b.t." << endl; } ////////////////////////////////////////////////////////// // Perform third and fourth quarter back-transformation // and compute contribution to gradient from non-sep 2PDM ////////////////////////////////////////////////////////// tim_enter("3.qbt+4.qbt+non-sep contrib."); for (i=0; i<thr_->nthread(); i++) { qbt34thread[i]->set_i_offset(i_offset); qbt34thread[i]->set_ni(ni); thr_->add_thread(i,qbt34thread[i]);# if SINGLE_THREAD_QBT34 qbt34thread[i]->run();# endif }# if !SINGLE_THREAD_QBT34 thr_->start_threads(); thr_->wait_threads();# endif tim_exit("3.qbt+4.qbt+non-sep contrib."); // Add thread contributions to Lpi and ginter for (i=0; i<thr_->nthread(); i++) { double *Lpi_thread = qbt34thread[i]->get_Lpi(); double **ginter_thread = qbt34thread[i]->get_ginter(); for (j=0; j<nbasis*ni; j++) Lpi[j] += Lpi_thread[j]; for (j=0; j<natom; j++) { for (k=0; k<3; k++) { ginter[j][k] += ginter_thread[j][k]; } } aointder_computed += qbt34thread[i]->get_aointder_computed(); } if (me == 0) { ExEnv::out0() << indent << "End of third and fourth q.b.t." << endl; } mem->sync(); // Make sure all nodes are done before deleting arrays if (debug_ > 1) { RefSCDimension ni_dim(new SCDimension(ni,1)); ni_dim->blocks()->set_subdim(0, new SCDimension(ni)); RefSCDimension nbasis_dim(new SCDimension(nbasis,1)); nbasis_dim->blocks()->set_subdim(0, new SCDimension(nbasis)); RefSCMatrix Lpi_mat(nbasis_dim, ni_dim, kit); Lpi_mat->assign(Lpi); Lpi_mat.print("Lpi"); } if (debug_ && me == 0) { ExEnv::out0() << indent << "Back-transform Lpi" << endl; } // Back-transform Lpi to MO basis lpi_ptr = Lpi; for (p=0; p<nbasis; p++) { for (i=0; i<ni; i++) { c_pa = &scf_vector[p][nocc]; laj_ptr = &Laj[nvir*(i_offset + i)]; for (a=0; a<nvir; a++) { *laj_ptr++ += *c_pa++ * *lpi_ptr; } // exit a loop lpi_ptr++; } // exit i loop } // exit p loop// malloc_chain_check(1); delete[] Lpi; if (me == 0) { ExEnv::out0() << indent << "Done with pass " << pass+1 << endl; } } // exit loop over i-batches (pass) tim_exit("mp2 passes"); if (dograd || do_d1_) { for (i=0; i<thr_->nthread(); i++) { delete qbt34thread[i]; } delete[] qbt34thread; } mem->set_localsize(0); // debug print if (debug_ && me == 0) { ExEnv::out0() << indent << "Exited loop over i-batches" << endl; } // end of debug print /////////////////////////////////////////////////////////////// // The computation of the MP2 energy is now complete on each // node; add the nodes' contributions and print out the energy /////////////////////////////////////////////////////////////// msg_->sum(ecorr_mp2); msg_->sum(aoint_computed); msg_->sum(aointder_computed); biggest_coefs.combine(msg_);#if PRINT_BIGGEST_INTS biggest_ints_1.combine(msg_); biggest_ints_2.combine(msg_); biggest_ints_2s.combine(msg_); biggest_ints_3a.combine(msg_); biggest_ints_3.combine(msg_);#endif if (me == 0) { emp2 = escf + ecorr_mp2;#if PRINT_BIGGEST_INTS ExEnv::out0() << "biggest 1/4 transformed ints" << endl; for (i=0; i<biggest_ints_1.ncontrib(); i++) { ExEnv::outn() << scprintf("%3d %3d %3d %3d %16.12f", biggest_ints_1.indices(i)[0], biggest_ints_1.indices(i)[1], biggest_ints_1.indices(i)[2], biggest_ints_1.indices(i)[3], biggest_ints_1.val(i) ) << endl; } ExEnv::outn() << "biggest 2/4 transformed ints" << endl; for (i=0; i<biggest_ints_2.ncontrib(); i++) { ExEnv::outn() << scprintf("%3d %3d %3d %3d %16.12f", biggest_ints_2.indices(i)[0], biggest_ints_2.indices(i)[1], biggest_ints_2.indices(i)[2], biggest_ints_2.indices(i)[3], biggest_ints_2.val(i) ) << endl; } ExEnv::outn() << "restricted 2/4 transformed ints" << endl; for (i=0; i<biggest_ints_2s.ncontrib(); i++) { ExEnv::outn() << scprintf("%3d %3d %3d %3d %16.12f", biggest_ints_2s.indices(i)[0], biggest_ints_2s.indices(i)[1], biggest_ints_2s.indices(i)[2], biggest_ints_2s.indices(i)[3], biggest_ints_2s.val(i) ) << endl; } ExEnv::outn() << "biggest 3/4 transformed ints (in 3.)" << endl; for (i=0; i<biggest_ints_3a.ncontrib(); i++) { ExEnv::outn() << scprintf("%3d %3d %3d %3d %16.12f", biggest_ints_3a.indices(i)[0], biggest_ints_3a.indices(i)[1],
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -