📄 hsosv2lb.cc
字号:
if (me == 0) { ExEnv::out0() << indent << " npass rest nbasis nshell nfuncmax" " ndocc nsocc nvir nfzc nfzv" << endl; ExEnv::out0() << indent << scprintf(" %-4i %-3i %-5i %-4i %-3i" " %-3i %-3i %-3i %-3i %-3i\n", npass,rest,nbasis,nshell,nfuncmax,ndocc,nsocc,nvir,nfzc,nfzv); ExEnv::out0() << indent << scprintf("Using %i bytes of memory",mem_alloc) << endl; } ////////////////////// // Test that ni is OK ////////////////////// if (me == 0) { ExEnv::out0() << indent << scprintf("Memory allocated: %i", mem_alloc) << endl; ExEnv::out0() << indent << scprintf("Memory used : %lf", A*ni*ni+B*ni+C) << endl; if (A*ni*ni + B*ni +C > mem_alloc) { ExEnv::err0() << "Problems with memory allocation: " << "Using more memory than allocated" << endl; abort(); } } ////////////////////////////////////////////////////////////////// // The scf vector might be distributed between the nodes, // but for OPT2 each node needs its own copy of the vector; // therefore, put a copy of the scf vector on each node; // while doing this, duplicate columns corresponding to singly // occupied orbitals and order columns as [socc docc socc unocc] // Also rearrange scf eigenvalues as [socc docc socc unocc] // want socc first to get the socc's in the first batch // (need socc's to compute energy denominators - see // socc_sum comment below) ///////////////////////////////////////////////////////// evals_open = (double*) malloc((noso+nsocc-nfzc-nfzv)*sizeof(double)); RefDiagSCMatrix occ; RefDiagSCMatrix evals; RefSCMatrix Scf_Vec; eigen(evals, Scf_Vec, occ); if (debug_) { evals.print("eigenvalues"); Scf_Vec.print("eigenvectors"); } double *scf_vectort_dat = new double[nbasis*noso]; Scf_Vec->convert(scf_vectort_dat); double** scf_vectort = new double*[nocc + nvir]; int idoc = 0, ivir = 0, isoc = 0; for (i=nfzc; i<noso-nfzv; i++) { if (occ(i) >= 2.0 - epsilon) { evals_open[idoc+nsocc] = evals(i); scf_vectort[idoc+nsocc] = &scf_vectort_dat[i*nbasis]; idoc++; } else if (occ(i) >= 1.0 - epsilon) { evals_open[isoc] = evals(i); scf_vectort[isoc] = &scf_vectort_dat[i*nbasis]; evals_open[isoc+nocc] = evals(i); scf_vectort[isoc+nocc] = &scf_vectort_dat[i*nbasis]; isoc++; } else { if (ivir < nvir) { evals_open[ivir+nocc+nsocc] = evals(i); scf_vectort[ivir+nocc+nsocc] = &scf_vectort_dat[i*nbasis]; } ivir++; } } // need the transpose of the vector double **scf_vector = new double*[nbasis]; double *scf_vector_dat = new double[(nocc+nvir)*nbasis]; for (i=0; i<nbasis; i++) { scf_vector[i] = &scf_vector_dat[(nocc+nvir)*i]; for (j=0; j<nocc+nvir; j++) { scf_vector[i][j] = scf_vectort[j][i]; } } delete[] scf_vectort; delete[] scf_vectort_dat; //////////////////////////////////////// // Allocate storage for various arrays //////////////////////////////////////// dim_ij = nocc*ni - ni*(ni - 1)/2; trans_int1 = (double*) malloc(nfuncmax*nfuncmax*nbasis*ni*sizeof(double)); trans_int2 = (double*) malloc(nvir*ni*sizeof(double)); trans_int3 = (double*) malloc(nbf[me]*nvir*dim_ij*sizeof(double)); trans_int4 = (double*) malloc(nvir*nvir*sizeof(double)); trans_int4_tmp = (double*) malloc(nvir*nvir*sizeof(double)); if (nsocc) socc_sum = (double*) malloc(nsocc*sizeof(double)); if (nsocc) socc_sum_tmp = (double*) malloc(nsocc*sizeof(double)); if (nsocc) mo_int_do_so_vir = (double*) malloc(ndocc*nsocc*(nvir-nsocc)*sizeof(double)); if (nsocc) mo_int_tmp = (double*) malloc(ndocc*nsocc*(nvir-nsocc)*sizeof(double)); if (nsocc) bzerofast(mo_int_do_so_vir,ndocc*nsocc*(nvir-nsocc)); // create the integrals object integral()->set_storage(mem_remaining); tbint_ = integral()->electron_repulsion(); intbuf = tbint_->buffer(); ///////////////////////////////////// // Begin opt2 loops ///////////////////////////////////// for (pass=0; pass<npass; pass++) { i_offset = pass*ni; if ((pass == npass - 1) && (rest != 0)) ni = rest; r_offset = 0; bzerofast(trans_int3,nbf[me]*nvir*dim_ij); tim_enter("RS loop"); for (imyshell=0; imyshell<nRshell; imyshell++) { R = myshells[imyshell]; nr = myshellsizes[imyshell]; for (S = 0; S < nshell; S++) { ns = basis()->shell(S).nfunction(); tim_enter("bzerofast trans_int1"); bzerofast(trans_int1,nfuncmax*nfuncmax*nbasis*ni); tim_exit("bzerofast trans_int1"); tim_enter("PQ loop"); for (P = 0; P < nshell; P++) { np = basis()->shell(P).nfunction(); for (Q = 0; Q <= P; Q++) { if (tbint_->log2_shell_bound(P,Q,R,S) < tol) { continue; // skip ereps less than tol } aoint_computed++; nq = basis()->shell(Q).nfunction(); tim_enter("erep"); tbint_->compute_shell(P,Q,R,S); tim_exit("erep"); tim_enter("1. quart. tr."); for (bf1 = 0; bf1 < np; bf1++) { p = basis()->shell_to_function(P) + bf1; for (bf2 = 0; bf2 < nq; bf2++) { q = basis()->shell_to_function(Q) + bf2; if (q > p) { // if q > p: want to skip the loops over bf3-4 // and larger bf2 values, so increment bf1 by 1 // ("break") break; } for (bf3 = 0; bf3 < nr; bf3++) { bf3_offset = 0; if (split_info[imyshell] != -1) bf3_offset = split_info[imyshell]; for (bf4 = 0; bf4 < ns; bf4++) { index = bf4 + ns*(bf3+bf3_offset + basis()->shell(R).nfunction()*(bf2 + nq*bf1)); if (fabs(intbuf[index]) > 1.0e-15) { pqrs = intbuf[index]; iqrs = &trans_int1[((bf4*nr + bf3)*nbasis + q)*ni]; iprs = &trans_int1[((bf4*nr + bf3)*nbasis + p)*ni]; if (p == q) pqrs *= 0.5; col_index = i_offset; c_pi = &scf_vector[p][col_index]; c_qi = &scf_vector[q][col_index]; for (i=ni; i; i--) { *iqrs++ += pqrs * *c_pi++; *iprs++ += pqrs * *c_qi++; } } } // exit bf4 loop } // exit bf3 loop } // exit bf2 loop } // exit bf1 loop tim_exit("1. quart. tr."); } // exit Q loop } // exit P loop tim_exit("PQ loop"); // Begin second and third quarter transformations for (bf3 = 0; bf3 < nr; bf3++) { r = r_offset + bf3; for (bf4 = 0; bf4 < ns; bf4++) { s = basis()->shell_to_function(S) + bf4; tim_enter("bzerofast trans_int2"); bzerofast(trans_int2,nvir*ni); tim_exit("bzerofast trans_int2"); tim_enter("2. quart. tr."); for (q = 0; q < nbasis; q++) { iars_ptr = trans_int2; iqrs = &trans_int1[((bf4*nr + bf3)*nbasis + q)*ni]; c_qa = &scf_vector[q][nocc]; for (a = 0; a < nvir; a++) { for (i=ni; i; i--) { *iars_ptr++ += *c_qa * *iqrs++; } iqrs -= ni; c_qa++; } } // exit q loop tim_exit("2. quart. tr."); // Begin third quarter transformation tim_enter("3. quart. tr."); for (i=0; i<ni; i++) { tmp_index = i*(i+1)/2 + i*i_offset; for (a=0; a<nvir; a++) { iars = trans_int2[a*ni + i]; c_sj = scf_vector[s]; iajr_ptr = &trans_int3[tmp_index + dim_ij*(a + nvir*r)]; for (j=0; j<=i+i_offset; j++) { *iajr_ptr++ += *c_sj++ * iars; } } } // exit i loop tim_exit("3. quart. tr."); } // exit bf4 loop } // exit bf3 loop } // exit S loop r_offset += nr; } // exit R loop tim_exit("RS loop"); // Begin fourth quarter transformation; // first tansform integrals with only s.o. indices; // these integrals are needed to compute the denominators // in the various terms contributing to the correlation energy // and must all be computed in the first pass; // the integrals are summed into the array socc_sum: // socc_sum[isocc] = sum over asocc of (isocc asocc|asocc isocc) // (isocc, asocc = s.o. and the sum over asocc runs over all s.o.'s) // the individual integrals are not saved here, only the sums are kept if (pass == 0) { tim_enter("4. quart. tr."); if (nsocc) bzerofast(socc_sum,nsocc); for (isocc=0; isocc<nsocc; isocc++) { index = 0; for (i=0; i<nRshell; i++) { for (j=0; j<myshellsizes[i]; j++) { r = basis()->shell_to_function(myshells[i]) + j; if (split_info[i] != -1) r += split_info[i]; for (asocc=0; asocc<nsocc; asocc++) { socc_sum[isocc] += scf_vector[r][nocc+asocc]* trans_int3[isocc*(isocc+1)/2 + isocc*i_offset + isocc + dim_ij*(asocc + nvir*index)]; } index++; } } } // exit i loop tim_exit("4. quart. tr."); // Sum socc_sum contributions from each node (only if nsocc > 0 // since gop1 will fail if nsocc = 0) if (nsocc > 0) { tim_enter("global sum socc_sum"); msg_->sum(socc_sum,nsocc,socc_sum_tmp); tim_exit("global sum socc_sum"); } } // Now we have all the sums of integrals involving s.o.'s (socc_sum); // begin fourth quarter transformation for all integrals (including // integrals with only s.o. indices); use restriction j <= (i_offset+i) // to save flops compute_index = 0; for (i=0; i<ni; i++) { for (j=0; j <= (i_offset+i); j++) { tim_enter("4. quart. tr."); bzerofast(trans_int4,nvir*nvir); index = 0; for (k=0; k<nRshell; k++) { for (l=0; l<myshellsizes[k]; l++) { r = basis()->shell_to_function(myshells[k]) + l; if (split_info[k] != -1) r += split_info[k]; for (a=0; a<nvir; a++) { iajb = &trans_int4[a*nvir]; iajr = trans_int3[i*(i+1)/2 + i*i_offset + j + dim_ij*(a+nvir*index)]; c_rb = &scf_vector[r][nocc]; for (b=0; b<nvir; b++) { *iajb++ += *c_rb++ * iajr; } } index++; } } // end of k loop tim_exit("4. quart. tr."); tim_enter("global sum trans_int4"); msg_->sum(trans_int4,nvir*nvir,trans_int4_tmp); tim_exit("global sum trans_int4"); // We now have the fully transformed integrals (ia|jb) // for one i, one j (j <= i_offset+i), and all a and b; // compute contribution to the OPT1 and OPT2 correlation // energies; use restriction b <= a to save flops tim_enter("compute ecorr"); for (a=0; a<nvir; a++) { for (b=0; b<=a; b++) { compute_index++; if (compute_index%nproc != me) continue; docc_index = ((i_offset+i) >= nsocc && (i_offset+i) < nocc) + (j >= nsocc && j < nocc); socc_index = ((i_offset+i)<nsocc)+(j<nsocc)+(a<nsocc)+(b<nsocc); vir_index = (a >= nsocc) + (b >= nsocc); if (socc_index >= 3) continue; // skip to next b value
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -