📄 compute_abs_a.cc
字号:
if (bev->block(i).null()) continue; RefSCMatrix bev_block = bev->block(i); RefSCMatrix bU_block = bU->block(i); for (int j=0; j<nfunc[i]; j++) {// For some reason this produces a bunch of temp objects which are never destroyed// and default_matrixkit is not detroyed at the end// bev->block(i)->assign_column(// bU->block(i)->get_column(pm_index[ifunc]),j// ); int nk = bev_block->rowdim().n(); for (int k=0; k<nk; k++) bev_block->set_element(k,j,bU_block->get_element(k,pm_index[ifunc])); ifunc++; } } overlap_sqrt_eigval = so_matrixkit_aux->diagmatrix(osodim_aux); overlap_sqrt_eigval->assign(pm_sqrt); overlap_isqrt_eigval = so_matrixkit_aux->diagmatrix(osodim_aux); overlap_isqrt_eigval->assign(pm_isqrt); delete[] nfunc; delete[] pm_sqrt; delete[] pm_isqrt; delete[] pm_index; if (debug_ > 1) { overlap_aux.print("S aux"); overlap_aux_eigvec.print("S aux eigvec"); overlap_isqrt_eigval.print("s^(-1/2) aux eigval"); overlap_sqrt_eigval.print("s^(1/2) aux eigval"); } RefSCMatrix orthog_aux_so = overlap_isqrt_eigval * overlap_aux_eigvec.t(); orthog_aux_so = orthog_aux_so.t();#endif orthog_aux = pl_aux->evecs_to_AO_basis(orthog_aux_so); orthog_aux_so = 0; if (debug_ > 1) orthog_aux.print("Aux orthogonalizer"); RefSCMatrix tmp = orthog_aux.t() * pl_aux->to_AO_basis(overlap_aux) * orthog_aux; if (debug_ > 1) tmp.print("Xt * S * X (Aux)"); } int noso_aux = orthog_aux.coldim().n(); double *orthog_aux_vector = new double[nbasis_aux*noso_aux]; orthog_aux.convert(orthog_aux_vector); orthog_aux = 0; int *symorb_irrep_aux = new int[noso_aux]; int orbnum=0; for (int i=0; i<osodim_aux->blocks()->nblock(); i++) { for (int j=0; j<osodim_aux->blocks()->size(i); j++, orbnum++) { symorb_irrep_aux[orbnum] = i; } } ///////////////////////////////////// // Begin MP2 loops ///////////////////////////////////// // debug print if (debug_) { ExEnv::outn() << indent << scprintf("node %i, begin loop over i-batches",me) << endl; } // end of debug print // Initialize the integrals integral->set_storage(mem_remaining); Ref<TwoBodyInt>* tbints = new Ref<TwoBodyInt>[thr->nthread()]; for (int i=0; i<thr->nthread(); i++) { tbints[i] = integral->grt(); } ExEnv::out0() << indent << scprintf("Memory used for integral storage: %i Bytes", integral->storage_used()) << endl; if (mem.null()) { ExEnv::errn() << "MBPT2: memory group not initialized" << endl; abort(); } mem->set_localsize(num_te_types*nijmax*nocc*nbasis_aux*sizeof(double)); ExEnv::out0() << indent << "Size of global distributed array: " << mem->totalsize() << " Bytes" << endl; Ref<ThreadLock> lock = thr->new_lock(); R12A_ABS_123Qtr** e123thread = new R12A_ABS_123Qtr*[thr->nthread()]; for (int i=0; i<thr->nthread(); i++) { e123thread[i] = new R12A_ABS_123Qtr(i, thr->nthread(), me, nproc, mem, msg, lock, bs, bs_aux, tbints[i], nocc, nocc_act, scf_vector, tol, debug_, r12info()->dynamic()); } /////////////////////////////////////////////////////////// // Figure out which integrals accumulator should be used /////////////////////////////////////////////////////////// Ref<R12IntsAcc> r12intsacc; R12IntEvalInfo::StoreMethod ints_method = r12info()->ints_method(); char *r12ints_file = r12info()->ints_file(); bool restart = (restart_orbital_ > 0); switch (ints_method) { case R12IntEvalInfo::mem_only: if (restart) throw std::runtime_error("R12IntEval_abs_A::compute -- cannot use MemoryGrp-based accumulator when restarting"); ExEnv::out0() << indent << "Will hold transformed integrals in memory" << endl; r12intsacc = new R12IntsAcc_MemoryGrp(mem,num_te_types,nocc,nbasis_aux,nocc,nfzc); break; case R12IntEvalInfo::mem_posix: if (npass == 1) { ExEnv::out0() << indent << "Will hold transformed integrals in memory" << endl; r12intsacc = new R12IntsAcc_MemoryGrp(mem,num_te_types,nocc,nbasis_aux,nocc,nfzc); break; } // else use the next case case R12IntEvalInfo::posix: ExEnv::out0() << indent << "Will use POSIX I/O on node 0 to handle transformed integrals" << endl; r12intsacc = new R12IntsAcc_Node0File(mem,r12ints_file,num_te_types,nocc,nbasis_aux,nocc,nfzc,restart); break;#if HAVE_MPIIO case R12IntEvalInfo::mem_mpi: if (npass == 1) { ExEnv::out0() << indent << "Will hold transformed integrals in memory" << endl; r12intsacc = new R12IntsAcc_MemoryGrp(mem,num_te_types,nocc,nbasis_aux,nocc,nfzc); break; } // else use the next case case R12IntEvalInfo::mpi: ExEnv::out0() << indent << "Will use MPI-IO (individual I/O) to handle transformed integrals" << endl; r12intsacc = new R12IntsAcc_MPIIOFile_Ind(mem,r12ints_file,num_te_types,nocc,nbasis_aux,nocc,nfzc,restart); break;#endif default: throw std::runtime_error("R12IntEval_abs_A::compute -- invalid integrals store method"); } free(r12ints_file); /*----------------------------------- Start the integrals transformation -----------------------------------*/ tim_enter("mp2-r12/a passes"); if (me == 0 && mole->if_to_checkpoint() && r12intsacc->can_restart()) { StateOutBin stateout(mole->checkpoint_file()); SavableState::save_state(mole,stateout); ExEnv::out0() << indent << "Checkpointed the wave function" << endl; } for (int pass=0; pass<npass; pass++) { ExEnv::out0() << indent << "Beginning pass " << pass+1 << endl; int i_offset = restart_orbital_ + pass*ni + nfzc; if ((pass == npass - 1) && (rest != 0)) ni = rest; // Compute number of of i,j pairs on each node during current pass for // two-el integrals int nij = 0; { int index = 0; for (int i=0; i<ni; i++) { for (int j=0; j<nocc_act; j++) { if (index++ % nproc == me) nij++; } } } if (debug_) ExEnv::outn() << indent << "node " << me << ", nij = " << nij << endl; // Allocate and initialize some arrays // (done here to avoid having these arrays // overlap with arrays allocated later) // Allocate (and initialize) some arrays double* integral_ijsk = (double*) mem->localdata(); bzerofast(integral_ijsk, (num_te_types*nij*nocc*nbasis_aux)); integral_ijsk = 0; mem->sync(); ExEnv::out0() << indent << scprintf("Begin loop over shells (grt, 1.+2. q.t.)") << endl; // Do the two electron integrals and the first three quarter transformations tim_enter("grt+1.qt+2.qt+3.qt"); for (int i=0; i<thr->nthread(); i++) { e123thread[i]->set_i_offset(i_offset); e123thread[i]->set_ni(ni); thr->add_thread(i,e123thread[i]);# if SINGLE_THREAD_E123 e123thread[i]->run();# endif }# if !SINGLE_THREAD_E123 thr->start_threads(); thr->wait_threads();# endif tim_exit("grt+1.qt+2.qt+3.qt"); ExEnv::out0() << indent << "End of loop over shells" << endl; mem->sync(); // Make sure ijsk is complete on each node before continuing integral_ijsk = (double*) mem->localdata();#if PRINT3Q if (me == 0) { int index = 0; int ij_index = 0; int j_offset = nfzc; for (int i = 0; i<ni; i++) { for (int j = 0; j<nocc_act; j++) { if (index++ % nproc == me) { double *ij_offset = integral_ijsk + num_te_types*nocc*nbasis_aux*ij_index; for(int te_type=0; te_type<PRINT_NUM_TE_TYPES; ++te_type, ij_offset+=nocc*nbasis_aux) { double *ijsk_ptr = ij_offset; for (int s = 0; s<nbasis_aux; s++) { for (int k = 0; k<nocc; k++) { printf("3Q: type = %d (%d %d|%d %d) = %12.8f\n", te_type,i+i_offset,k,j+j_offset,s,*ijsk_ptr); ijsk_ptr++; } } } ij_index++; } } } }#endif double *kx_tmp = new double[nocc*noso_aux]; // in ikjx (stored as ijkx): i act; j act; k occ; x any MO. // Begin fourth quarter transformation ExEnv::out0() << indent << "Begin fourth q.t." << endl; tim_enter("4. q.t."); int index = 0; int ij_index = 0; for (int i=0; i<ni; i++) { for (int j=0; j<nocc_act; j++) { if (index++ % nproc == me) { double *ij_offset = integral_ijsk + num_te_types*nocc*nbasis_aux*ij_index; for(int te_type=0; te_type<num_te_types; te_type++,ij_offset+=nocc*nbasis_aux) { bzerofast(kx_tmp, nocc*noso_aux); double *ijs_offset = ij_offset; int sx = 0; for (int s=0; s<nbasis_aux; s++, ijs_offset+=nocc) { for (int x=0; x<noso_aux; x++, ++sx) { double c_sx = orthog_aux_vector[sx]; double *kx_ptr = kx_tmp + x; double *sk_ptr = ijs_offset; for (int k=0; k<nocc; ++k) { *kx_ptr += c_sx * *sk_ptr; ++sk_ptr; kx_ptr += noso_aux; } // end of k loop } // end of x loop } // end of s loop // Put kx_tmp into ijsk for one i,j while // overwriting elements of ijsk double *ijsk_ptr = ij_offset; double *ijkx_ptr = kx_tmp; int nkx = nocc*noso_aux; for (int kx=0; kx<nkx; ++kx) { *ijsk_ptr = *ijkx_ptr; ++ijsk_ptr; ++ijkx_ptr; } } // end of te_type loop ij_index++; } // endif } // exit j loop } // exit i loop // end of fourth quarter transformation tim_exit("4. q.t."); ExEnv::out0() << indent << "End of fourth q.t." << endl; // The array integral_ijsk has now been overwritten by MO integrals ijkx // rename the array mo_int double* mo_int = integral_ijsk; delete[] kx_tmp; // Zero out nonsymmetric integrals { int index = 0; int ij_index = 0; int j_offset = nfzc; for (int i = 0; i<ni; i++) { for (int j = 0; j<nocc_act; j++) { if (index++ % nproc == me) { double *ij_offset = mo_int + num_te_types*nocc*nbasis_aux*ij_index; for(int te_type=0; te_type<num_te_types; ++te_type,ij_offset+=nocc*nbasis_aux) { double *ijkx_ptr = ij_offset; for (int k=0; k<nocc; ++k) { for (int x=0; x<noso_aux; ++x) { if (( mo_irrep[i+i_offset] ^ mo_irrep[j+j_offset] ^ mo_irrep[k] ^ symorb_irrep_aux[x]) ) { *ijkx_ptr = 0.0; } ijkx_ptr++; } } } ij_index++; } } } }#if PRINT4Q if (me == 0) { int index = 0; int ij_index = 0; int j_offset = nfzc; for (int i = 0; i<ni; i++) { for (int j = 0; j<nocc_act; j++) { if (index++ % nproc == me) { double *ij_offset = mo_int + num_te_types*nocc*nbasis_aux*ij_index; for(int te_type=0; te_type<PRINT_NUM_TE_TYPES; te_type++,ij_offset+=nocc*nbasis_aux) { double *ijkx_ptr = ij_offset; for (int k=0; k<nocc; ++k) { for (int x=0; x<noso_aux; ++x) { printf("4Q: type = %d (%d %d|%d %d) = %12.8f\n", te_type,i+i_offset,k,j+j_offset,x,*ijkx_ptr); ijkx_ptr++; } } } ij_index++; } } } }#endif integral_ijsk = 0; mem->sync(); // Make sure MO integrals are complete on all nodes before continuing // Push locally stored integrals to an accumulator // This could involve storing the data to disk or simply remembering the pointer tim_enter("MO ints store"); r12intsacc->store_memorygrp(mem,ni); tim_exit("MO ints store"); mem->sync(); if (me == 0 && mole->if_to_checkpoint() && r12intsacc->can_restart()) { current_orbital_ += ni; StateOutBin stateout(mole->checkpoint_file()); SavableState::save_state(mole,stateout); ExEnv::out0() << indent << "Checkpointed the wave function" << endl; } } // end of loop over passes tim_exit("mp2-r12/a passes"); if (debug_) ExEnv::out0() << indent << "End of mp2-r12/a transformation" << endl; // Done storing integrals - commit the content // WARNING: it is not safe to use mem until deactivate has been called on the accumulator // After that deactivate the size of mem will be 0 [mem->set_localsize(0)] r12intsacc->commit(); /*-------------------------------- Compute MP2-R12/A intermediates and collect on node0 --------------------------------*/ tim_enter("mp2-r12a intermeds"); int naa = (nocc_act*(nocc_act-1))/2; // Number of alpha-alpha pairs int nab = nocc_act*nocc_act; // Number of alpha-beta pairs if (debug_) { ExEnv::out0() << indent << "naa = " << naa << endl; ExEnv::out0() << indent << "nab = " << nab << endl; } double *Vaa_ijkl = new double[naa*naa]; double *Xaa_ijkl = new double[naa*naa]; double *Taa_ijkl = new double[naa*naa]; double *Vab_ijkl = new double[nab*nab]; double *Xab_ijkl = new double[nab*nab]; double *Tab_ijkl = new double[nab*nab]; if (debug_) ExEnv::out0() << indent << "Allocated intermediates V, X, and T" << endl; bzerofast(Vaa_ijkl,naa*naa); bzerofast(Xaa_ijkl,naa*naa); bzerofast(Taa_ijkl,naa*naa); bzerofast(Vab_ijkl,nab*nab); bzerofast(Xab_ijkl,nab*nab); bzerofast(Tab_ijkl,nab*nab); // Compute intermediates if (debug_) ExEnv::out0() << indent << "Ready to compute intermediates V, X, and T" << endl; const double oosqrt2 = 1.0/sqrt(2.0); // Compute the number of tasks that have full access to the integrals // and split the work among them int nproc_with_ints = 0; for(int proc=0;proc<nproc;proc++) if (r12intsacc->has_access(proc)) nproc_with_ints++; int *proc_with_ints = new int[nproc]; int count = 0; for(int proc=0;proc<nproc;proc++) if (r12intsacc->has_access(proc)) { proc_with_ints[proc] = count; count++; } else proc_with_ints[proc] = -1; if (debug_) ExEnv::out0() << indent << "Computing intermediates on " << nproc_with_ints << " processors" << endl; ////////////////////////////////////////////////////////////// // // Evaluation of the intermediates proceeds as follows: // // loop over batches of kl, k >= l, 0<=k,l<nocc_act // load (kl|xy), (kl| [T1,r12] |xy), and (lk| [T1,r12] |xy) // (aka kl-sets) into memory // // loop over batches of ij, i>=j, 0<=i,j<nocc_act // load (ij|r12|xy) into memory // (aka ij-sets) into memory // compute V[ij][kl] and T[ij][kl] for all ij and kl in // the "direct product" batch // end ij loop // end kl loop // ///////////////////////////////////////////////////////////////////////////////// if (r12intsacc->has_access(me)) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -