📄 hsosv2lb.cc

📁 大型并行量子化学软件；支持密度泛函（DFT）。可以进行各种量子化学计算。支持CHARMM并行计算。非常具有应用价值。
💻 CC
📖 第 1 页 / 共 3 页
字号:
  if (me == 0) {    ExEnv::out0() << indent << " npass  rest  nbasis  nshell  nfuncmax"                     "  ndocc  nsocc  nvir  nfzc  nfzv" << endl;    ExEnv::out0() << indent         << scprintf("  %-4i   %-3i   %-5i    %-4i     %-3i"                     "     %-3i    %-3i    %-3i    %-3i   %-3i\n",             npass,rest,nbasis,nshell,nfuncmax,ndocc,nsocc,nvir,nfzc,nfzv);    ExEnv::out0() << indent         << scprintf("Using %i bytes of memory",mem_alloc) << endl;    }  //////////////////////  // Test that ni is OK  //////////////////////  if (me == 0) {    ExEnv::out0() << indent         << scprintf("Memory allocated: %i", mem_alloc) << endl;    ExEnv::out0() << indent         << scprintf("Memory used     : %lf", A*ni*ni+B*ni+C) << endl;    if (A*ni*ni + B*ni +C > mem_alloc) {      ExEnv::err0() << "Problems with memory allocation: "           << "Using more memory than allocated" << endl;      abort();      }    }  //////////////////////////////////////////////////////////////////  // The scf vector might be distributed between the nodes,  // but for OPT2 each node needs its own copy of the vector;  // therefore, put a copy of the scf vector on each node;  // while doing this, duplicate columns corresponding to singly  // occupied orbitals and order columns as [socc docc socc unocc]  // Also rearrange scf eigenvalues as [socc docc socc unocc]  // want socc first to get the socc's in the first batch  // (need socc's to compute energy denominators - see  // socc_sum comment below)  /////////////////////////////////////////////////////////  evals_open = (double*) malloc((noso+nsocc-nfzc-nfzv)*sizeof(double));  RefDiagSCMatrix occ;  RefDiagSCMatrix evals;  RefSCMatrix Scf_Vec;  eigen(evals, Scf_Vec, occ);  if (debug_) {    evals.print("eigenvalues");    Scf_Vec.print("eigenvectors");    }  double *scf_vectort_dat = new double[nbasis*noso];  Scf_Vec->convert(scf_vectort_dat);  double** scf_vectort = new double*[nocc + nvir];  int idoc = 0, ivir = 0, isoc = 0;  for (i=nfzc; i<noso-nfzv; i++) {    if (occ(i) >= 2.0 - epsilon) {      evals_open[idoc+nsocc] = evals(i);      scf_vectort[idoc+nsocc] = &scf_vectort_dat[i*nbasis];      idoc++;      }    else if (occ(i) >= 1.0 - epsilon) {      evals_open[isoc] = evals(i);      scf_vectort[isoc] = &scf_vectort_dat[i*nbasis];      evals_open[isoc+nocc] = evals(i);      scf_vectort[isoc+nocc] = &scf_vectort_dat[i*nbasis];      isoc++;      }    else {      if (ivir < nvir) {        evals_open[ivir+nocc+nsocc] = evals(i);        scf_vectort[ivir+nocc+nsocc] = &scf_vectort_dat[i*nbasis];        }      ivir++;      }    }  // need the transpose of the vector  double **scf_vector = new double*[nbasis];  double *scf_vector_dat = new double[(nocc+nvir)*nbasis];  for (i=0; i<nbasis; i++) {    scf_vector[i] = &scf_vector_dat[(nocc+nvir)*i];    for (j=0; j<nocc+nvir; j++) {      scf_vector[i][j] = scf_vectort[j][i];      }    }  delete[] scf_vectort;  delete[] scf_vectort_dat;  ////////////////////////////////////////  // Allocate storage for various arrays  ////////////////////////////////////////  dim_ij = nocc*ni - ni*(ni - 1)/2;  trans_int1 = (double*) malloc(nfuncmax*nfuncmax*nbasis*ni*sizeof(double));  trans_int2 = (double*) malloc(nvir*ni*sizeof(double));  trans_int3 = (double*) malloc(nbf[me]*nvir*dim_ij*sizeof(double));  trans_int4 = (double*) malloc(nvir*nvir*sizeof(double));  trans_int4_tmp = (double*) malloc(nvir*nvir*sizeof(double));  if (nsocc) socc_sum = (double*) malloc(nsocc*sizeof(double));  if (nsocc) socc_sum_tmp = (double*) malloc(nsocc*sizeof(double));  if (nsocc) mo_int_do_so_vir =                      (double*) malloc(ndocc*nsocc*(nvir-nsocc)*sizeof(double));  if (nsocc) mo_int_tmp =                      (double*) malloc(ndocc*nsocc*(nvir-nsocc)*sizeof(double));  if (nsocc) bzerofast(mo_int_do_so_vir,ndocc*nsocc*(nvir-nsocc));  // create the integrals object  integral()->set_storage(mem_remaining);  tbint_ = integral()->electron_repulsion();  intbuf = tbint_->buffer(); ///////////////////////////////////// //  Begin opt2 loops /////////////////////////////////////  for (pass=0; pass<npass; pass++) {    i_offset = pass*ni;      if ((pass == npass - 1) && (rest != 0)) ni = rest;    r_offset = 0;    bzerofast(trans_int3,nbf[me]*nvir*dim_ij);    tim_enter("RS loop");    for (imyshell=0; imyshell<nRshell; imyshell++) {      R = myshells[imyshell];      nr = myshellsizes[imyshell];      for (S = 0; S < nshell; S++) {        ns = basis()->shell(S).nfunction();        tim_enter("bzerofast trans_int1");        bzerofast(trans_int1,nfuncmax*nfuncmax*nbasis*ni);        tim_exit("bzerofast trans_int1");        tim_enter("PQ loop");        for (P = 0; P < nshell; P++) {          np = basis()->shell(P).nfunction();          for (Q = 0; Q <= P; Q++) {            if (tbint_->log2_shell_bound(P,Q,R,S) < tol) {              continue;                          // skip ereps less than tol              }            aoint_computed++;            nq = basis()->shell(Q).nfunction();            tim_enter("erep");            tbint_->compute_shell(P,Q,R,S);            tim_exit("erep");            tim_enter("1. quart. tr.");            for (bf1 = 0; bf1 < np; bf1++) {              p = basis()->shell_to_function(P) + bf1;               for (bf2 = 0; bf2 < nq; bf2++) {                q = basis()->shell_to_function(Q) + bf2;                if (q > p) {                  // if q > p: want to skip the loops over bf3-4                  // and larger bf2 values, so increment bf1 by 1                  // ("break")                  break;                  }                for (bf3 = 0; bf3 < nr; bf3++) {                  bf3_offset = 0;                  if (split_info[imyshell] != -1) bf3_offset = split_info[imyshell];                  for (bf4 = 0; bf4 < ns; bf4++) {                    index = bf4 + ns*(bf3+bf3_offset +                                basis()->shell(R).nfunction()*(bf2 + nq*bf1));                    if (fabs(intbuf[index]) > 1.0e-15) {                      pqrs = intbuf[index];                      iqrs = &trans_int1[((bf4*nr + bf3)*nbasis + q)*ni];                      iprs = &trans_int1[((bf4*nr + bf3)*nbasis + p)*ni];                                          if (p == q) pqrs *= 0.5;                      col_index = i_offset;                      c_pi = &scf_vector[p][col_index];                      c_qi = &scf_vector[q][col_index];                      for (i=ni; i; i--) {                        *iqrs++ += pqrs * *c_pi++;                        *iprs++ += pqrs * *c_qi++;                        }                      }                    }   // exit bf4 loop                  }     // exit bf3 loop                }       // exit bf2 loop              }         // exit bf1 loop            tim_exit("1. quart. tr.");            }           // exit Q loop          }             // exit P loop        tim_exit("PQ loop");        // Begin second and third quarter transformations        for (bf3 = 0; bf3 < nr; bf3++) {          r = r_offset + bf3;          for (bf4 = 0; bf4 < ns; bf4++) {            s = basis()->shell_to_function(S) + bf4;            tim_enter("bzerofast trans_int2");            bzerofast(trans_int2,nvir*ni);            tim_exit("bzerofast trans_int2");            tim_enter("2. quart. tr.");            for (q = 0; q < nbasis; q++) {              iars_ptr = trans_int2;              iqrs = &trans_int1[((bf4*nr + bf3)*nbasis + q)*ni];              c_qa = &scf_vector[q][nocc];              for (a = 0; a < nvir; a++) {                for (i=ni; i; i--) {                  *iars_ptr++ += *c_qa * *iqrs++;                  }                iqrs -= ni;                c_qa++;                }              }             // exit q loop            tim_exit("2. quart. tr.");            // Begin third quarter transformation            tim_enter("3. quart. tr.");            for (i=0; i<ni; i++) {              tmp_index = i*(i+1)/2 + i*i_offset;              for (a=0; a<nvir; a++) {                iars = trans_int2[a*ni + i];                c_sj = scf_vector[s];                iajr_ptr = &trans_int3[tmp_index + dim_ij*(a + nvir*r)];                for (j=0; j<=i+i_offset; j++) {                  *iajr_ptr++ += *c_sj++ * iars;                  }                }              }   // exit i loop            tim_exit("3. quart. tr.");            } // exit bf4 loop          }   // exit bf3 loop        }         // exit S loop      r_offset += nr;      }           // exit R loop    tim_exit("RS loop");    // Begin fourth quarter transformation;    // first tansform integrals with only s.o. indices;    // these integrals are needed to compute the denominators    // in the various terms contributing to the correlation energy    // and must all be computed in the first pass;    // the integrals are summed into the array socc_sum:    // socc_sum[isocc] = sum over asocc of (isocc asocc|asocc isocc)    // (isocc, asocc = s.o. and the sum over asocc runs over all s.o.'s)    // the individual integrals are not saved here, only the sums are kept    if (pass == 0) {      tim_enter("4. quart. tr.");      if (nsocc) bzerofast(socc_sum,nsocc);      for (isocc=0; isocc<nsocc; isocc++) {        index = 0;        for (i=0; i<nRshell; i++) {          for (j=0; j<myshellsizes[i]; j++) {            r = basis()->shell_to_function(myshells[i]) + j;            if (split_info[i] != -1) r += split_info[i];            for (asocc=0; asocc<nsocc; asocc++) {              socc_sum[isocc] += scf_vector[r][nocc+asocc]*                             trans_int3[isocc*(isocc+1)/2 + isocc*i_offset                                       + isocc + dim_ij*(asocc + nvir*index)];              }            index++;            }          }        }       // exit i loop      tim_exit("4. quart. tr.");      // Sum socc_sum contributions from each node (only if nsocc > 0      // since gop1 will fail if nsocc = 0)      if (nsocc > 0) {        tim_enter("global sum socc_sum");        msg_->sum(socc_sum,nsocc,socc_sum_tmp);        tim_exit("global sum socc_sum");        }      }     // Now we have all the sums of integrals involving s.o.'s (socc_sum);    // begin fourth quarter transformation for all integrals (including    // integrals with only s.o. indices); use restriction j <= (i_offset+i)    // to save flops    compute_index = 0;    for (i=0; i<ni; i++) {      for (j=0; j <= (i_offset+i); j++) {        tim_enter("4. quart. tr.");        bzerofast(trans_int4,nvir*nvir);        index = 0;        for (k=0; k<nRshell; k++) {          for (l=0; l<myshellsizes[k]; l++) {            r = basis()->shell_to_function(myshells[k]) + l;            if (split_info[k] != -1) r += split_info[k];            for (a=0; a<nvir; a++) {              iajb = &trans_int4[a*nvir];              iajr = trans_int3[i*(i+1)/2 + i*i_offset + j + dim_ij*(a+nvir*index)];              c_rb = &scf_vector[r][nocc];              for (b=0; b<nvir; b++) {                *iajb++ += *c_rb++ * iajr;                }              }            index++;            }          }   // end of k loop        tim_exit("4. quart. tr.");        tim_enter("global sum trans_int4");        msg_->sum(trans_int4,nvir*nvir,trans_int4_tmp);        tim_exit("global sum trans_int4");        // We now have the fully transformed integrals (ia|jb)        // for one i, one j (j <= i_offset+i), and all a and b;        // compute contribution to the OPT1 and OPT2 correlation        // energies; use restriction b <= a to save flops        tim_enter("compute ecorr");        for (a=0; a<nvir; a++) {          for (b=0; b<=a; b++) {            compute_index++;            if (compute_index%nproc != me) continue;            docc_index = ((i_offset+i) >= nsocc && (i_offset+i) < nocc)                         + (j >= nsocc && j < nocc);            socc_index = ((i_offset+i)<nsocc)+(j<nsocc)+(a<nsocc)+(b<nsocc);            vir_index = (a >= nsocc) + (b >= nsocc);            if (socc_index >= 3) continue; // skip to next b value
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -