📄 fsdavidson.cpp

📁 An object-oriented C++ implementation of Davidson method for finding a few selected extreme eigenpai
💻 CPP
📖 第 1 页 / 共 5 页
字号:
*          The initial number of intervals.  It may not be greater than
*          MMAX.
*
*  NBMIN   (input) INTEGER
*          The smallest number of intervals that should be processed
*          using a vector loop.  If zero, then only the scalar loop
*          will be used.
*
*  ABSTOL  (input) DOUBLE PRECISION
*          The minimum (absolute) width of an interval.  When an
*          interval is narrower than ABSTOL, or than RELTOL times the
*          larger (in magnitude) endpoint, then it is considered to be
*          sufficiently small, i.e., converged.  This must be at least
*          zero.
*
*  RELTOL  (input) DOUBLE PRECISION
*          The minimum relative width of an interval.  When an interval
*          is narrower than ABSTOL, or than RELTOL times the larger (in
*          magnitude) endpoint, then it is considered to be
*          sufficiently small, i.e., converged.  Note: this should
*          always be at least radix*machine epsilon.
*
*  PIVMIN  (input) DOUBLE PRECISION
*          The minimum absolute value of a "pivot" in the Sturm
*          sequence loop.  This *must* be at least  max |e(j)**2| *
*          safe_min  and at least safe_min, where safe_min is at least
*          the smallest number that can divide one without overflow.
*
*  D       (input) DOUBLE PRECISION array, dimension (N)
*          The diagonal elements of the tridiagonal matrix T.
*
*  E       (input) DOUBLE PRECISION array, dimension (N)
*          The offdiagonal elements of the tridiagonal matrix T in
*          positions 1 through N-1.  E(N) is arbitrary.
*
*  E2      (input) DOUBLE PRECISION array, dimension (N)
*          The squares of the offdiagonal elements of the tridiagonal
*          matrix T.  E2(N) is ignored.
*
*  NVAL    (input/output) INTEGER array, dimension (MINP)
*          If IJOB=1 or 2, not referenced.
*          If IJOB=3, the desired values of N(w).  The elements of NVAL
*          will be reordered to correspond with the intervals in AB.
*          Thus, NVAL(j) on output will not, in general be the same as
*          NVAL(j) on input, but it will correspond with the interval
*          (AB(j,1),AB(j,2)] on output.
*
*  AB      (input/output) DOUBLE PRECISION array, dimension (MMAX,2)
*          The endpoints of the intervals.  AB(j,1) is  a(j), the left
*          endpoint of the j-th interval, and AB(j,2) is b(j), the
*          right endpoint of the j-th interval.  The input intervals
*          will, in general, be modified, split, and reordered by the
*          calculation.
*
*  C       (input/output) DOUBLE PRECISION array, dimension (MMAX)
*          If IJOB=1, ignored.
*          If IJOB=2, workspace.
*          If IJOB=3, then on input C(j) should be initialized to the
*          first search point in the binary search.
*
*  MOUT    (output) INTEGER
*          If IJOB=1, the number of eigenvalues in the intervals.
*          If IJOB=2 or 3, the number of intervals output.
*          If IJOB=3, MOUT will equal MINP.
*
*  NAB     (input/output) INTEGER array, dimension (MMAX,2)
*          If IJOB=1, then on output NAB(i,j) will be set to N(AB(i,j)).
*          If IJOB=2, then on input, NAB(i,j) should be set.  It must
*             satisfy the condition:
*             N(AB(i,1)) <= NAB(i,1) <= NAB(i,2) <= N(AB(i,2)),
*             which means that in interval i only eigenvalues
*             NAB(i,1)+1,...,NAB(i,2) will be considered.  Usually,
*             NAB(i,j)=N(AB(i,j)), from a previous call to DLAEBZ with
*             IJOB=1.
*             On output, NAB(i,j) will contain
*             max(na(k),min(nb(k),N(AB(i,j)))), where k is the index of
*             the input interval that the output interval
*             (AB(j,1),AB(j,2)] came from, and na(k) and nb(k) are the
*             the input values of NAB(k,1) and NAB(k,2).
*          If IJOB=3, then on output, NAB(i,j) contains N(AB(i,j)),
*             unless N(w) > NVAL(i) for all search points  w , in which
*             case NAB(i,1) will not be modified, i.e., the output
*             value will be the same as the input value (modulo
*             reorderings -- see NVAL and AB), or unless N(w) < NVAL(i)
*             for all search points  w , in which case NAB(i,2) will
*             not be modified.  Normally, NAB should be set to some
*             distinctive value(s) before DLAEBZ is called.
*
*  WORK    (workspace) DOUBLE PRECISION array, dimension (MMAX)
*          Workspace.
*
*  IWORK   (workspace) INTEGER array, dimension (MMAX)
*          Workspace.
*
*  INFO    (output) INTEGER
*          = 0:       All intervals converged.
*          = 1--MMAX: The last INFO intervals did not converge.
*          = MMAX+1:  More than MMAX intervals were generated.
*
*  Further Details
*  ===============
*
*      This routine is intended to be called only by other LAPACK
*  routines, thus the interface is less user-friendly.  It is intended
*  for two purposes:
*
*  (a) finding eigenvalues.  In this case, DLAEBZ should have one or
*      more initial intervals set up in AB, and DLAEBZ should be called
*      with IJOB=1.  This sets up NAB, and also counts the eigenvalues.
*      Intervals with no eigenvalues would usually be thrown out at
*      this point.  Also, if not all the eigenvalues in an interval i
*      are desired, NAB(i,1) can be increased or NAB(i,2) decreased.
*      For example, set NAB(i,1)=NAB(i,2)-1 to get the largest
*      eigenvalue.  DLAEBZ is then called with IJOB=2 and MMAX
*      no smaller than the value of MOUT returned by the call with
*      IJOB=1.  After this (IJOB=2) call, eigenvalues NAB(i,1)+1
*      through NAB(i,2) are approximately AB(i,1) (or AB(i,2)) to the
*      tolerance specified by ABSTOL and RELTOL.
*
*  (b) finding an interval (a',b'] containing eigenvalues w(f),...,w(l).
*      In this case, start with a Gershgorin interval  (a,b).  Set up
*      AB to contain 2 search intervals, both initially (a,b).  One
*      NVAL element should contain  f-1  and the other should contain  l
*      , while C should contain a and b, resp.  NAB(i,1) should be -1
*      and NAB(i,2) should be N+1, to flag an error if the desired
*      interval does not lie in (a,b).  DLAEBZ is then called with
*      IJOB=3.  On exit, if w(f-1) < w(f), then one of the intervals --
*      j -- will have AB(j,1)=AB(j,2) and NAB(j,1)=NAB(j,2)=f-1, while
*      if, to the specified tolerance, w(f-k)=...=w(f+r), k > 0 and r
*      >= 0, then the interval will have  N(AB(j,1))=NAB(j,1)=f-k and
*      N(AB(j,2))=NAB(j,2)=f+r.  The cases w(l) < w(l+1) and
*      w(l-r)=...=w(l+k) are handled similarly.
*/
{
    ntyp itmp1, itmp2, kfnew, klnew;
    ftyp tmp1, tmp2;

    //Check for Errors
    *info = 0;
    if (ijob<1 || ijob>3) {
    	*info = -1;
        return;
    }

    if( ijob==1 ) {
    	//Compute the number of eigenvalues in the initial intervals.
    	*mout = 0;
        for (ntyp ji=1; ji<=minp; ji++) {
        	for (ntyp jp=1; jp<=2; jp++) {
            	tmp1 = d[1 -1] - ab(ji,jp);
                if (fabs( tmp1 )<pivmin) tmp1 = -pivmin;
                nab(ji,jp) = 0;
                if (tmp1<=0.0)  nab(ji,jp) = 1;
                for (ntyp j=2; j<=n; j++) {
                	tmp1 = d[j -1] - e2[j-1 -1] / tmp1 -  ab(ji,jp);
                    if (fabs( tmp1 )<pivmin) tmp1 = -pivmin;
                    if (tmp1<=0.0)  nab(ji,jp)++;
                }
            }
            *mout = *mout +  nab(ji,2) - nab(ji,1);
        }
        return;
    }

/*    Initialize for loop
*
*     KF and KL have the following meaning:
*        Intervals 1,...,KF-1 have converged.
*        Intervals KF,...,KL  still need to be refined.    */
    ntyp kf = 1,
    	 kl = minp;

/*    If IJOB=2, initialize C.
*     If IJOB=3, use the user-supplied starting point. */
    if (ijob==2)
    	for (ntyp ji=1; ji<=minp; ji++) c[ji -1] = 0.5*( ab(ji,1)+ab(ji,2) );

    //Iteration loop
    for (ntyp jit=1; jit<=nitmax; jit++) {
    	//Loop over intervals
    	if ( (kl-kf+1)>=nbmin && nbmin>0) {
        	//Begin of Parallel Version of the loop
        	for (ntyp ji=kf; ji<=kl; ji++) {
            	//Compute N(c), the number of eigenvalues less than c
            	work[ji -1] = d[1 -1] - c[ji -1];
                iwork[ji -1] = 0;
               	if( work[ji -1]<=pivmin ) {
                	iwork[ji -1] = 1;
                    work[ji -1] = MIN( work[ji -1], -pivmin );
                }
                for (ntyp j=2; j<=n; j++) {
                    	work[ji -1] = d[j -1] - e2[j-2] / work[ji -1] - c[ji -1];
                        if (work[ji -1]<=pivmin) {
                        	iwork[ji -1] = iwork[ji -1] + 1;
                            work[ji -1] = MIN( work[ji -1], -pivmin );
                        }
                }
            }
            if (ijob<=2) {
            	//IJOB=2: Choose all intervals containing eigenvalues.
            	klnew = kl;
                for (ntyp ji=kf; ji<=kl; ji++){
                	//Insure that N(w) is monotone
                	iwork[ji -1] = MIN( nab(ji,2), (MAX( nab(ji,1), iwork[ji -1] )) );
                    //Update the Queue -- add intervals if both halves
					//contain eigenvalues.
                    if (iwork[ji -1]==nab(ji,2))
                    		//No eigenvalue in the upper interval:
                    		//just use the lower interval.
                    		ab(ji,2)=c[ji -1];
                    	else if (iwork[ji -1]==nab(ji,1))
                        		//No eigenvalue in the lower interval:
                    			//just use the upper interval.
                        		ab(ji,1) = c[ji -1];
                        	else {
                            	klnew = klnew + 1;
                                if (klnew<=mmax) {
                                    //Eigenvalue in both intervals -- add upper to
                       			    //queue.
                                	ab(klnew,2)=ab(ji,2);
                                    nab(klnew,2)=nab(ji,2);
                                    ab(klnew,1)=c[ji -1];
                                    nab(klnew,1)=iwork[ji -1];
                                    ab(ji,2)=c[ji -1];
                                    nab(ji,2)=iwork[ji -1];
                                } else *info = mmax + 1;
                            }
                }
                if (*info!=0) return;
               	kl = klnew;
            } else {
                //IJOB=3: Binary search.  Keep only the interval containing
                //w   s.t. N(w) = NVAL
            	for (ntyp ji=kf; ji<=kl; ji++) {
                	if (iwork[ji -1]<=nval[ji -1]) {
                    	ab(ji,1) = c[ji -1];
                        nab(ji,1) = iwork[ji -1];
                    }
                    if (iwork[ji -1]>=nval[ji -1]) {
                    	ab(ji,2) = c[ji -1];
                        nab(ji,2) = iwork[ji -1];
                    }
                }
            }
        } else {
            //End of Parallel Version of the loop
            //Begin of Serial Version of the loop
        	klnew = kl;
            for (ntyp ji=kf; ji<=kl; ji++) {
            	tmp1 = c[ji -1];
               	tmp2 = d[1 -1] - tmp1;
                itmp1 = 0;
                if (tmp2<=pivmin) {
                	itmp1 = 1;
                    tmp2 = MIN( tmp2, (-pivmin) );
                }
                //A series of compiler directives to defeat vectorization
                //for the next loop
                for (ntyp j=2; j<=n; j++) {
                	tmp2 = d[j -1] - e2[j-2] / tmp2 - tmp1;
                    if (tmp2<=pivmin) {
                    	++itmp1;
                        tmp2 = MIN( tmp2, (-pivmin) );
                    }
                }
                if (ijob<=2) {
                    //IJOB=2: Choose all intervals containing eigenvalues.
                    //Insure that N(w) is monotone
                	itmp1 = MIN( nab(ji,2),MAX( nab(ji,1), itmp1 ) );
                    //Update the Queue -- add intervals if both halves
                    //contain eigenvalues.
                    if (itmp1==nab(ji,2))
                    		//No eigenvalue in the upper interval:
                    		//just use the lower interval.
                    		ab(ji,2) = tmp1;
                    	else if (itmp1==nab(ji,1))
                                //No eigenvalue in the lower interval:
                    			//just use the upper interval.
                        		ab(ji,1) = tmp1;
                        	else if (klnew<mmax) {
                            	//Eigenvalue in both intervals -- add upper to queue.
                            	klnew++;
                                ab(klnew,2)=ab(ji,2);
                                nab(klnew,2)=nab(ji,2);
                                ab(klnew,1)=tmp1;
                                nab(klnew,1)=itmp1;
                                ab(ji,2)=tmp1;
                                nab(ji,2)=itmp1;
                            } else {
                            	*info = mmax + 1;
                                return;
                            }
                } else {
                	//IJOB=3: Binary search.  Keep only the interval
                    //containing  w  s.t. N(w) = NVAL
                	if (itmp1<=nval[ji -1]) {
                    	ab(ji,1) = tmp1;
                        nab(ji,1) = itmp1;
                    }
                    if( itmp1>=nval[ji -1] ) {
                    	ab(ji,2) = tmp1;
                        nab(ji,2) = itmp1;
                    }
                }
            }
            kl = klnew;
            //End of Serial Version of the loop
        }
        //Check for convergence
        kfnew = kf;
        for (ntyp ji=kf; ji<=kl; ji++) {
        	tmp1 = fabs( ab(ji,2)-ab(ji,1) );
            tmp2 = MAX( fabs( ab(ji,2) ), fabs( ab(ji,1) ) );
            if (tmp1<(MAX( (MAX(abstol, pivmin)), reltol*tmp2 )) || nab(ji,1)>=nab(ji,2) ) {
                //Converged -- Swap with position KFNEW,
                //then increment KFNEW
            	if (ji>kfnew) {
                	tmp1 = ab(ji,1);
                  	tmp2 = ab(ji,2);
                    itmp1 = nab(ji,1);
                  	itmp2 = nab(ji,2);
                    ab(ji,1) = ab(kfnew,1);
                  	ab(ji,2) = ab(kfnew,2);
                  	nab(ji,1) = nab(kfnew,1);
                  	nab(ji,2) = nab(kfnew,2);
                  	ab(kfnew,1) = tmp1;
                  	ab(kfnew,2) = tmp2;
                  	nab(kfnew,1) = itmp1;
                  	nab(kfnew,2) = itmp2;
                    if (ijob==3){
                    	itmp1 = nval[ji -1];
                    	nval[ji -1] = nval[kfnew -1];
                    	nval[kfnew -1] = itmp1;
                    }
                }
                kfnew++;
            }
        }
        kf = kfnew;
        //Choose Midpoints
        for (ntyp ji=kf; ji<=kl; ji++) c[ji -1]=0.5*( ab(ji,1)+ab(ji,2) );
        //If no more intervals to refine, quit.
        if (kf>kl) {
        	*info = MAX( kl+1-kf, 0 );
            *mout = kl;
            return;
        }
    }
    //Converged
    *info = MAX( kl+1-kf, 0 );
    *mout = kl;

	return;
}

#undef ab  //(j,i)
#undef nab //(j,i)
//---------------------------------------------------------------------------
void fsDavidson::dlaev2(ftyp a, ftyp b, ftyp c, ftyp *rt1, ftyp *rt2,
 			ftyp *cs1, ftyp *sn1)
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -