📄 hsosv1.cc
字号:
* each node needs its own copy of the vector; * * therefore, put a copy of the scf vector on each node; * * while doing this, duplicate columns corresponding to singly * * occupied orbitals and order columns as [socc docc socc unocc] */ /* also rearrange scf eigenvalues as [socc docc socc unocc] * * want socc first to get the socc's in the first batch * * (need socc's to compute energy denominators - see * * socc_sum comment below) */ evals_open = (double*) malloc((noso+nsocc-nfzc-nfzv)*sizeof(double)); if (!evals_open) { ExEnv::errn() << "could not allocate storage for evals_open" << endl; abort(); } RefDiagSCMatrix occ; RefDiagSCMatrix evals; RefSCMatrix Scf_Vec; eigen(evals, Scf_Vec, occ); if (debug_>0) ExEnv::out0() << indent << "eigvenvectors computed" << endl; if (debug_>1) evals.print("eigenvalues"); if (debug_>2) Scf_Vec.print("eigenvectors"); double *scf_vectort_dat = new double[noso*nbasis]; Scf_Vec->convert(scf_vectort_dat); double** scf_vectort = new double*[nocc + nvir]; int idoc = 0, ivir = 0, isoc = 0; for (i=nfzc; i<noso-nfzv; i++) { if (occ(i) >= 2.0 - epsilon) { evals_open[idoc+nsocc] = evals(i); scf_vectort[idoc+nsocc] = &scf_vectort_dat[i*nbasis]; idoc++; } else if (occ(i) >= 1.0 - epsilon) { evals_open[isoc] = evals(i); scf_vectort[isoc] = &scf_vectort_dat[i*nbasis]; evals_open[isoc+nocc] = evals(i); scf_vectort[isoc+nocc] = &scf_vectort_dat[i*nbasis]; isoc++; } else { if (ivir < nvir) { evals_open[ivir+nocc+nsocc] = evals(i); scf_vectort[ivir+nocc+nsocc] = &scf_vectort_dat[i*nbasis]; } ivir++; } } // need the transpose of the vector if (debug_>0) ExEnv::out0() << indent << "allocating scf_vector" << endl; double **scf_vector = new double*[nbasis]; double *scf_vector_dat = new double[(nocc+nvir)*nbasis]; for (i=0; i<nbasis; i++) { scf_vector[i] = &scf_vector_dat[(nocc+nvir)*i]; for (j=0; j<nocc+nvir; j++) { scf_vector[i][j] = scf_vectort[j][i]; } } delete[] scf_vectort; delete[] scf_vectort_dat; if (debug_>2) { ExEnv::out0() << indent << "Final eigenvalues and vectors" << endl; for (i=0; i<nocc+nvir; i++) { ExEnv::out0() << indent << evals_open[i]; for (j=0; j<nbasis; j++) { ExEnv::out0() << " " << scf_vector[j][i]; } ExEnv::out0()<< endl; } ExEnv::out0() << endl; } /* allocate storage for integral arrays */ if (debug_>0) ExEnv::out0() << indent << "allocating intermediates" << endl; dim_ij = nocc*ni - ni*(ni-1)/2; trans_int1 = (double*) malloc(nfuncmax*nfuncmax*nbasis*ni*sizeof(double)); trans_int2 = (double*) malloc(nfuncmax*nfuncmax*nbasis*ni*sizeof(double)); trans_int3 = (double*) malloc(nbasis*a_number*dim_ij*sizeof(double)); trans_int4_node= (double*) malloc(nvir*a_number*sizeof(double)); trans_int4 = (double*) malloc(nvir*nvir*sizeof(double)); if (!(trans_int1 && trans_int2 && (!a_number || trans_int3) && (!a_number || trans_int4_node) && trans_int4)){ ExEnv::errn() << "could not allocate storage for integral arrays" << endl; abort(); } if (nsocc) socc_sum = (double*) malloc(nsocc*sizeof(double)); if (nsocc) mo_int_do_so_vir = (double*) malloc(ndocc*nsocc*(nvir-nsocc)*sizeof(double)); if (nsocc) mo_int_tmp = (double*) malloc(ndocc*nsocc*(nvir-nsocc)*sizeof(double)); if (nsocc) bzerofast(mo_int_do_so_vir,ndocc*nsocc*(nvir-nsocc)); // create the integrals object if (debug_>0) ExEnv::out0() << indent << "allocating integrals" << endl; integral()->set_storage(mem_remaining); Ref<TwoBodyInt> *tbint = new Ref<TwoBodyInt>[thr_->nthread()]; for (ithread=0; ithread<thr_->nthread(); ithread++) { tbint[ithread] = integral()->electron_repulsion(); } // set up the thread objects Ref<ThreadLock> lock = thr_->new_lock(); HSOSV1Erep1Qtr** e1thread = new HSOSV1Erep1Qtr*[thr_->nthread()]; for (ithread=0; ithread<thr_->nthread(); ithread++) { e1thread[ithread] = new HSOSV1Erep1Qtr(ithread, thr_->nthread(), me, nproc, lock, basis(), tbint[ithread], ni, scf_vector, tol, debug_); } if (debug_>0) ExEnv::out0() << indent << "beginning passes" << endl;/*************************************************************************** begin opt2 loops ****************************************************************************/ int work = ((nshell*(nshell+1))/2); int print_interval = work/100; if (print_interval == 0) print_interval = 1; if (work == 0) work = 1; for (pass=0; pass<npass; pass++) { if (debug_) { ExEnv::out0() << indent << "Beginning pass " << pass << endl; } int print_index = 0; i_offset= pass*ni + restart_orbital_v1_; if ((pass == npass - 1) && (rest != 0)) ni = rest; bzerofast(trans_int3,nbasis*a_number*dim_ij); tim_enter("RS loop"); for (R = 0; R < basis()->nshell(); R++) { nr = basis()->shell(R).nfunction(); for (S = 0; S <= R; S++) { ns = basis()->shell(S).nfunction(); tim_enter("bzerofast trans_int1"); bzerofast(trans_int1,nfuncmax*nfuncmax*nbasis*ni); tim_exit("bzerofast trans_int1"); if (debug_ && (print_index++)%print_interval == 0) { lock->lock(); ExEnv::outn() << scprintf("%d: (PQ|%d %d) %d%%", me,R,S,(100*print_index)/work) << endl; lock->unlock(); } tim_enter("PQ loop"); for (ithread=0; ithread<thr_->nthread(); ithread++) { e1thread[ithread]->set_data(R,nr,S,ns,ni,i_offset); thr_->add_thread(ithread,e1thread[ithread]); } thr_->start_threads(); thr_->wait_threads(); for (ithread=0; ithread<thr_->nthread(); ithread++) { e1thread[ithread]->accum_buffer(trans_int1); } tim_exit("PQ loop"); tim_enter("sum int"); msg_->sum(trans_int1,nr*ns*nbasis*ni,trans_int2); tim_exit("sum int"); /* begin second quarter transformation */ tim_enter("bzerofast trans_int2"); bzerofast(trans_int2,nfuncmax*nfuncmax*nbasis*ni); tim_exit("bzerofast trans_int2"); tim_enter("2. quart. tr."); for (bf3 = 0; bf3 < nr; bf3++) { for (bf4 = 0; bf4 < ns; bf4++) { if (R == S && bf4 > bf3) continue; for (q = 0; q < nbasis; q++) { c_qa = &scf_vector[q][nocc + a_offset]; iqrs = &trans_int1[((bf4*nr + bf3)*nbasis + q)*ni]; iars_ptr = &trans_int2[((bf4*nr + bf3)*a_number)*ni]; for (a = 0; a < a_number; a++) { for (i=ni; i; i--) { *iars_ptr++ += *c_qa * *iqrs++; } iqrs -= ni; c_qa++; } } } } tim_exit("2. quart. tr."); /* begin third quarter transformation */ tim_enter("3. quart. tr."); for (bf3 = 0; bf3<nr; bf3++) { r = basis()->shell_to_function(R) + bf3; for (bf4 = 0; bf4 <= (R == S ? bf3:(ns-1)); bf4++) { s = basis()->shell_to_function(S) + bf4; for (i=0; i<ni; i++) { tmp_index = i*(i+1)/2 + i*i_offset; for (a=0; a<a_number; a++) { iars = trans_int2[((bf4*nr + bf3)*a_number + a)*ni + i]; if (r == s) iars *= 0.5; iajs_ptr = &trans_int3[tmp_index + dim_ij*(a + a_number*s)]; iajr_ptr = &trans_int3[tmp_index + dim_ij*(a + a_number*r)]; c_rj = scf_vector[r]; c_sj = scf_vector[s]; for (j=0; j<=i+i_offset; j++) { *iajs_ptr++ += *c_rj++ * iars; *iajr_ptr++ += *c_sj++ * iars; } } } } /* exit bf4 loop */ } /* exit bf3 loop */ tim_exit("3. quart. tr."); } /* exit S loop */ } /* exit R loop */ tim_exit("RS loop"); /* begin fourth quarter transformation; * * first tansform integrals with only s.o. indices; * * these integrals are needed to compute the denominators * * in the various terms contributing to the correlation energy * * and must all be computed in the first pass; * * the integrals are summed into the array socc_sum: * * socc_sum[isocc] = sum over asocc of (isocc asocc|asocc isocc) * * (isocc, asocc = s.o. and the sum over asocc runs over all s.o.'s) * * the individual integrals are not saved here, only the sums are kept */ if (debug_) { ExEnv::out0() << indent << "Beginning 4. quarter transform" << endl; } tim_enter("4. quart. tr."); if (pass == 0 && me == 0) { if (nsocc) bzerofast(socc_sum,nsocc); for (isocc=0; isocc<nsocc; isocc++) { for (r=0; r<nbasis; r++) { for (asocc=0; asocc<nsocc; asocc++) { socc_sum[isocc] += scf_vector[r][nocc+asocc]* trans_int3[isocc*(isocc+1)/2 + isocc*i_offset + isocc + dim_ij*(asocc + a_number*r)]; } } } } tim_enter("bcast0 socc_sum"); if (nsocc) msg_->bcast(socc_sum,nsocc); tim_exit("bcast0 socc_sum"); tim_exit("4. quart. tr."); /* now we have all the sums of integrals involving s.o.'s (socc_sum); * * begin fourth quarter transformation for all integrals (including * * integrals with only s.o. indices); use restriction j <= (i_offset+i) * * to save flops */ compute_index = 0; for (i=0; i<ni; i++) { for (j=0; j <= (i_offset+i); j++) { tim_enter("4. quart. tr."); bzerofast(trans_int4_node,nvir*a_number); for (r=0; r<nbasis; r++) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -