📄 hsosv2lb.cc
字号:
//// hsosv2lb.cc//// Copyright (C) 1996 Limit Point Systems, Inc.//// Author: Ida Nielsen <ida@kemi.aau.dk>// Maintainer: LPS//// This file is part of the SC Toolkit.//// The SC Toolkit is free software; you can redistribute it and/or modify// it under the terms of the GNU Library General Public License as published by// the Free Software Foundation; either version 2, or (at your option)// any later version.//// The SC Toolkit is distributed in the hope that it will be useful,// but WITHOUT ANY WARRANTY; without even the implied warranty of// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the// GNU Library General Public License for more details.//// You should have received a copy of the GNU Library General Public License// along with the SC Toolkit; see the file COPYING.LIB. If not, write to// the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.//// The U.S. Government is granted a limited license as per AL 91-7.//#include <math.h>#include <util/misc/timer.h>#include <util/misc/formio.h>#include <chemistry/molecule/molecule.h>#include <chemistry/qc/mbpt/mbpt.h>#include <chemistry/qc/mbpt/bzerofast.h>using namespace std;using namespace sc;static void iqs(int *item,int *index,int left,int right);static void iquicksort(int *item,int *index,int n);static void findprocminmax(int *nbf, int nproc, int *procmin, int *procmax, int *minbf, int *maxbf);static void findshellmax(int *myshellsizes, int nRshell, int *shellmax, int *shellmaxindex);static void expandintarray(int *&a, int dim);voidMBPT2::compute_hsos_v2_lb(){ int i, j, k, l; int s1, s2; int a, b; int isocc, asocc; // indices running over singly occupied orbitals int nfuncmax = basis()->max_nfunction_in_shell(); int nvir; int nshell; int shellmax; int shellmaxindex; int nocc=0,ndocc=0,nsocc=0; int i_offset; int npass, pass; int ni; int np, nq, nr, ns; int P, Q, R, S; int p, q, r, s; int bf1, bf2, bf3, bf4; int bf3_offset; int nbfmoved; int nbfav; // average number of r basis functions per node int minbf, maxbf; // max/min number of (r) basis functions on a node int index; int compute_index; int col_index; int tmp_index; int dim_ij; int docc_index, socc_index, vir_index; int me; int nproc; int procmin, procmax; // processor with most/fewest basis functions int rest; int r_offset; int min; int iproc; int nRshell; int imyshell; int *myshells; // the R indices processed by node me int *myshellsizes; // sizes of the shells (after split) on node me int *split_info; // on each node: offset for each shell; -1 if shell not split int *shellsize; // size of each shell int *sorted_shells; // sorted shell indices: large shells->small shells int *nbf; // number of basis functions processed by each node int *proc; // element k: processor which will process shell k int aoint_computed = 0; double A, B, C, ni_top, max, ni_double; // variables used to compute ni double *evals_open; // reordered scf eigenvalues const double *intbuf; // 2-electron AO integral buffer double *trans_int1; // partially transformed integrals double *trans_int2; // partially transformed integrals double *trans_int3; // partially transformed integrals double *trans_int4; // fully transformed integrals double *trans_int4_tmp; // scratch array double *mo_int_do_so_vir=0;//mo integral (is|sa); i:d.o.,s:s.o.,a:vir double *mo_int_tmp=0; // scratch array used in global summations double *socc_sum=0; // sum of 2-el integrals involving only s.o.'s double *socc_sum_tmp=0;// scratch array double *iqrs, *iprs; double *iars_ptr; double iars; double iajr; double *iajr_ptr; double *iajb; double pqrs; double *c_qa; double *c_rb, *c_pi, *c_qi, *c_sj; double delta_ijab; double delta; double contrib1, contrib2; double ecorr_opt2=0,ecorr_opt1=0; double ecorr_zapt2; double ecorr_opt2_contrib=0, ecorr_zapt2_contrib=0; double escf; double eopt2,eopt1,ezapt2; double tol; // log2 of the erep tolerance (erep < 2^tol => discard) me = msg_->me(); ExEnv::out0() << indent << "Just entered OPT2 program (opt2v2lb)" << endl; tol = (int) (-10.0/log10(2.0)); // discard ereps smaller than 10^-10 nproc = msg_->n(); ExEnv::out0() << indent << "nproc = " << nproc << endl; ndocc = nsocc = 0; const double epsilon = 1.0e-4; for (i=0; i<oso_dimension()->n(); i++) { if (reference_->occupation(i) >= 2.0 - epsilon) ndocc++; else if (reference_->occupation(i) >= 1.0 - epsilon) nsocc++; } // Do a few preliminary tests to make sure the desired calculation // can be done (and appears to be meaningful!) if (ndocc == 0 && nsocc == 0) { ExEnv::err0() << "There are no occupied orbitals; program exiting" << endl; abort(); } if (nfzc > ndocc) { ExEnv::err0() << "The number of frozen core orbitals exceeds the number" << endl << "of doubly occupied orbitals; program exiting" << endl; abort(); } if (nfzv > noso - ndocc - nsocc) { ExEnv::err0() << "The number of frozen virtual orbitals exceeds the number" << endl << "of unoccupied orbitals; program exiting" << endl; abort(); } ndocc = ndocc - nfzc; // nvir = # of unocc. orb. + # of s.o. orb. - # of frozen virt. orb. nvir = noso - ndocc - nfzc - nfzv; // nocc = # of d.o. orb. + # of s.o. orb - # of frozen d.o. orb. nocc = ndocc + nsocc; nshell = basis()->nshell(); // Allocate storage for some arrays used for keeping track of which R // indices are processed by each node shellsize = (int*) malloc(nshell*sizeof(int)); sorted_shells = (int*) malloc(nshell*sizeof(int)); nbf = (int*) malloc(nproc*sizeof(int)); proc = (int*) malloc(nshell*sizeof(int)); /////////////////////////////////////////////////////// // Begin distributing R shells between nodes so all // nodes get ca. the same number of r basis functions /////////////////////////////////////////////////////// // Compute the size of each shell for (i=0; i<nshell; i++) { shellsize[i] = basis()->shell(i).nfunction(); } // Do an index sort (large -> small) of shellsize to form sorted_shells iquicksort(shellsize,sorted_shells,nshell); // Initialize nbf for (i=0; i<nproc; i++) nbf[i] = 0; for (i=0; i<nshell; i++) { min = nbf[0]; iproc = 0; for (j=1; j<nproc; j++) { if (nbf[j] < min) { iproc = j; min = nbf[j]; } } proc[sorted_shells[i]] = iproc; nbf[iproc] += shellsize[sorted_shells[i]]; } if (me == 0) { ExEnv::out0() << indent << "Distribution of basis functions between nodes:" << endl; for (i=0; i<nproc; i++) { if (i%12 == 0) ExEnv::out0() << indent; ExEnv::out0() << scprintf(" %4i",nbf[i]); if ((i+1)%12 == 0) ExEnv::out0() << endl; } ExEnv::out0() << endl; } // Determine which shells are to be processed by node me nRshell = 0; for (i=0; i<nshell; i++) { if (proc[i] == me) nRshell++; } myshells = (int*) malloc(nRshell*sizeof(int)); imyshell = 0; for (i=0; i<nshell; i++) { if (proc[i] == me) { myshells[imyshell] = i; imyshell++; } } ///////////////////////////////////////////////////////////// // End of preliminary distribution of R shells between nodes ///////////////////////////////////////////////////////////// // Compute the average number of basis functions per node nbfav = nbasis/nproc; if (nbasis%nproc) nbfav++; myshellsizes = (int*) malloc(nRshell*sizeof(int)); split_info = (int*) malloc(nRshell*sizeof(int)); for (j=0; j<nRshell; j++) { myshellsizes[j] = basis()->shell(myshells[j]).nfunction(); split_info[j] = -1; } // Find the processor with the most/fewest basis functions findprocminmax(nbf,nproc,&procmin,&procmax,&minbf,&maxbf); if (maxbf > nbfav) { ExEnv::out0() << indent << "Redistributing basis functions" << endl; } while (maxbf > nbfav) { msg_->sync(); if (me == procmax) { findshellmax(myshellsizes, nRshell, &shellmax, &shellmaxindex); nbfmoved = 0; while (maxbf>nbfav && minbf<nbfav && shellmax>1) { shellmax--; nbfmoved++; maxbf--; minbf++; } myshellsizes[shellmaxindex] = shellmax; if (split_info[shellmaxindex] == -1) split_info[shellmaxindex] = 0; shellmax += nbfmoved; // Send nbfmoved from procmax to all other nodes msg_->bcast(nbfmoved,procmax); // Send variables to node procmin msg_->send(procmin,&myshells[shellmaxindex],1); msg_->send(procmin,&shellmax,1); } else { // Receive nbfmoved from procmax msg_->bcast(nbfmoved,procmax); } nbf[procmax] -= nbfmoved; if (me == procmin) { expandintarray(myshellsizes,nRshell); expandintarray(myshells,nRshell); expandintarray(split_info,nRshell); nRshell++; myshellsizes[nRshell-1] = nbfmoved; msg_->recv(procmax,&myshells[nRshell-1],1); msg_->recv(procmax,&split_info[nRshell-1],1); split_info[nRshell-1] -= myshellsizes[nRshell-1]; } nbf[procmin] += nbfmoved; msg_->sync(); findprocminmax(nbf,nproc,&procmin,&procmax,&minbf,&maxbf); } if (me == 0) { ExEnv::out0() << indent << "New distribution of basis functions between nodes:" << endl; for (i=0; i<nproc; i++) { if (i%12 == 0) ExEnv::out0() << indent; ExEnv::out0() << scprintf(" %4i",nbf[i]); if ((i+1)%12 == 0) ExEnv::out0() << endl; } ExEnv::out0() << endl; } ////////////////////////////////////////////////////////// // End of distribution of R shells and r basis functions ////////////////////////////////////////////////////////// // Compute batch size ni for opt2 loops; // need to store the following arrays of type double : trans_int1-4, // trans_int4_tmp, scf_vector, evals_open, socc_sum, socc_sum_tmp, // mo_int_do_so_vir, mo_int_tmp, // and the following arrays of type int: myshells, shellsize, // sorted_shells, nbf, and proc A = -0.5*sizeof(double)*nbf[me]*nvir; B = sizeof(double)*(nfuncmax*nfuncmax*nbasis + nvir + nocc*nbf[me]*nvir + nbf[me]*nvir*0.5); C = sizeof(double)*(2*nvir*nvir + (nbasis+1)*(nvir+nocc) + 2*nsocc + 2*ndocc*nsocc*(nvir-nsocc)) + sizeof(int)*(3*nshell + nproc + nRshell); ni_top = -B/(2*A); max = A*ni_top*ni_top + B*ni_top +C; if (max <= mem_alloc) { ni = nocc; } else { ni_double = (-B + sqrt((double)(B*B - 4*A*(C-mem_alloc))))/(2*A); ni = (int) ni_double; if (ni > nocc) ni = nocc; max = mem_alloc; } size_t mem_remaining = mem_alloc - (size_t)max; // Set ni equal to the smallest batch size for any node msg_->min(ni); msg_->bcast(ni); if (ni < nsocc) { ExEnv::err0() << "Not enough memory allocated" << endl; abort(); } if (ni < 1) { // this applies only to a closed shell case ExEnv::err0() << "Not enough memory allocated" << endl; abort(); } ExEnv::out0() << indent << "Computed batchsize: " << ni << endl; if (nocc == ni) { npass = 1; rest = 0; } else { rest = nocc%ni; npass = (nocc - rest)/ni + 1; if (rest == 0) npass--; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -