slasd4.c

来自「提供矩阵类的函数库」· C语言代码 · 共 1,088 行 · 第 1/2 页
1,088 行
#include "blaswrap.h"
/*  -- translated by f2c (version 19990503).
   You must link the resulting object file with the libraries:
	-lf2c -lm   (in that order)
*/

#include "f2c.h"

/* Common Block Declarations */

struct {
    real ops, itcnt;
} latime_;

#define latime_1 latime_

/* Subroutine */ int slasd4_(integer *n, integer *i__, real *d__, real *z__, 
	real *delta, real *rho, real *sigma, real *work, integer *info)
{
    /* System generated locals */
    integer i__1;
    real r__1;

    /* Builtin functions */
    double sqrt(doublereal);

    /* Local variables */
    static real dphi, dpsi;
    static integer iter;
    static real temp, prew, sg2lb, sg2ub, temp1, temp2, a, b, c__;
    static integer j;
    static real w, dtiim, delsq, dtiip;
    static integer niter;
    static real dtisq;
    static logical swtch;
    static real dtnsq;
    extern /* Subroutine */ int slaed6_(integer *, logical *, real *, real *, 
	    real *, real *, real *, integer *);
    static real delsq2;
    extern /* Subroutine */ int slasd5_(integer *, real *, real *, real *, 
	    real *, real *, real *);
    static real dd[3], dtnsq1;
    static logical swtch3;
    static integer ii;
    static real dw;
    extern doublereal slamch_(char *);
    static real zz[3];
    static logical orgati;
    static real erretm, dtipsq, rhoinv;
    static integer ip1;
    static real eta, phi, eps, tau, psi;
    static integer iim1, iip1;


/*  -- LAPACK auxiliary routine (instrumented to count ops, version 3.0) --   
       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,   
       Courant Institute, NAG Ltd., and Rice University   
       October 31, 1999   


    Purpose   
    =======   

    This subroutine computes the square root of the I-th updated   
    eigenvalue of a positive symmetric rank-one modification to   
    a positive diagonal matrix whose entries are given as the squares   
    of the corresponding entries in the array d, and that   

           0 <= D(i) < D(j)  for  i < j   

    and that RHO > 0. This is arranged by the calling routine, and is   
    no loss in generality.  The rank-one modified system is thus   

           diag( D ) * diag( D ) +  RHO *  Z * Z_transpose.   

    where we assume the Euclidean norm of Z is 1.   

    The method consists of approximating the rational functions in the   
    secular equation by simpler interpolating rational functions.   

    Arguments   
    =========   

    N      (input) INTEGER   
           The length of all arrays.   

    I      (input) INTEGER   
           The index of the eigenvalue to be computed.  1 <= I <= N.   

    D      (input) REAL array, dimension ( N )   
           The original eigenvalues.  It is assumed that they are in   
           order, 0 <= D(I) < D(J)  for I < J.   

    Z      (input) REAL array, dimension ( N )   
           The components of the updating vector.   

    DELTA  (output) REAL array, dimension ( N )   
           If N .ne. 1, DELTA contains (D(j) - sigma_I) in its  j-th   
           component.  If N = 1, then DELTA(1) = 1.  The vector DELTA   
           contains the information necessary to construct the   
           (singular) eigenvectors.   

    RHO    (input) REAL   
           The scalar in the symmetric updating formula.   

    SIGMA  (output) REAL   
           The computed lambda_I, the I-th updated eigenvalue.   

    WORK   (workspace) REAL array, dimension ( N )   
           If N .ne. 1, WORK contains (D(j) + sigma_I) in its  j-th   
           component.  If N = 1, then WORK( 1 ) = 1.   

    INFO   (output) INTEGER   
           = 0:  successful exit   
           > 0:  if INFO = 1, the updating process failed.   

    Internal Parameters   
    ===================   

    Logical variable ORGATI (origin-at-i?) is used for distinguishing   
    whether D(i) or D(i+1) is treated as the origin.   

              ORGATI = .true.    origin at i   
              ORGATI = .false.   origin at i+1   

    Logical variable SWTCH3 (switch-for-3-poles?) is for noting   
    if we are working with THREE poles!   

    MAXIT is the maximum number of iterations allowed for each   
    eigenvalue.   

    Further Details   
    ===============   

    Based on contributions by   
       Ren-Cang Li, Computer Science Division, University of California   
       at Berkeley, USA   

    =====================================================================   


       Since this routine is called in an inner loop, we do no argument   
       checking.   

       Quick return for N=1 and 2.   

       Parameter adjustments */
    --work;
    --delta;
    --z__;
    --d__;

    /* Function Body */
    *info = 0;
    if (*n == 1) {

/*        Presumably, I=1 upon entry */

	latime_1.ops += 5.f;
	*sigma = sqrt(d__[1] * d__[1] + *rho * z__[1] * z__[1]);
	delta[1] = 1.f;
	work[1] = 1.f;
	return 0;
    }
    if (*n == 2) {
	slasd5_(i__, &d__[1], &z__[1], &delta[1], rho, sigma, &work[1]);
	return 0;
    }

/*     Compute machine epsilon */

    eps = slamch_("Epsilon");
    latime_1.ops += 1.f;
    rhoinv = 1.f / *rho;

/*     The case I = N */

    if (*i__ == *n) {

/*        Initialize some basic variables */

	ii = *n - 1;
	niter = 1;

/*        Calculate initial guess */

	latime_1.ops += 1.f;
	temp = *rho / 2.f;

/*        If ||Z||_2 is not one, then TEMP should be set to   
          RHO * ||Z||_2^2 / TWO */

	latime_1.ops += (real) ((*n << 2) + 5);
	temp1 = temp / (d__[*n] + sqrt(d__[*n] * d__[*n] + temp));
	i__1 = *n;
	for (j = 1; j <= i__1; ++j) {
	    work[j] = d__[j] + d__[*n] + temp1;
	    delta[j] = d__[j] - d__[*n] - temp1;
/* L10: */
	}

	psi = 0.f;
	latime_1.ops += (real) (*n - 2 << 2);
	i__1 = *n - 2;
	for (j = 1; j <= i__1; ++j) {
	    psi += z__[j] * z__[j] / (delta[j] * work[j]);
/* L20: */
	}

	latime_1.ops += 9.f;
	c__ = rhoinv + psi;
	w = c__ + z__[ii] * z__[ii] / (delta[ii] * work[ii]) + z__[*n] * z__[*
		n] / (delta[*n] * work[*n]);

	if (w <= 0.f) {
	    latime_1.ops += 14.f;
	    temp1 = sqrt(d__[*n] * d__[*n] + *rho);
	    temp = z__[*n - 1] * z__[*n - 1] / ((d__[*n - 1] + temp1) * (d__[*
		    n] - d__[*n - 1] + *rho / (d__[*n] + temp1))) + z__[*n] * 
		    z__[*n] / *rho;

/*           The following TAU is to approximate   
             SIGMA_n^2 - D( N )*D( N ) */

	    if (c__ <= temp) {
		tau = *rho;
	    } else {
		latime_1.ops += 10.f;
		delsq = (d__[*n] - d__[*n - 1]) * (d__[*n] + d__[*n - 1]);
		a = -c__ * delsq + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*
			n];
		b = z__[*n] * z__[*n] * delsq;
		if (a < 0.f) {
		    latime_1.ops += 8.f;
		    tau = b * 2.f / (sqrt(a * a + b * 4.f * c__) - a);
		} else {
		    latime_1.ops += 8.f;
		    tau = (a + sqrt(a * a + b * 4.f * c__)) / (c__ * 2.f);
		}
	    }

/*           It can be proved that   
                 D(N)^2+RHO/2 <= SIGMA_n^2 < D(N)^2+TAU <= D(N)^2+RHO */

	} else {
	    latime_1.ops += 10.f;
	    delsq = (d__[*n] - d__[*n - 1]) * (d__[*n] + d__[*n - 1]);
	    a = -c__ * delsq + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*n];
	    b = z__[*n] * z__[*n] * delsq;

/*           The following TAU is to approximate   
             SIGMA_n^2 - D( N )*D( N ) */

	    if (a < 0.f) {
		latime_1.ops += 8.f;
		tau = b * 2.f / (sqrt(a * a + b * 4.f * c__) - a);
	    } else {
		latime_1.ops += 8.f;
		tau = (a + sqrt(a * a + b * 4.f * c__)) / (c__ * 2.f);
	    }

/*           It can be proved that   
             D(N)^2 < D(N)^2+TAU < SIGMA(N)^2 < D(N)^2+RHO/2 */

	}

/*        The following ETA is to approximate SIGMA_n - D( N ) */

	latime_1.ops += 5.f;
	eta = tau / (d__[*n] + sqrt(d__[*n] * d__[*n] + tau));

	latime_1.ops += (real) ((*n << 2) + 1);
	*sigma = d__[*n] + eta;
	i__1 = *n;
	for (j = 1; j <= i__1; ++j) {
	    delta[j] = d__[j] - d__[*i__] - eta;
	    work[j] = d__[j] + d__[*i__] + eta;
/* L30: */
	}

/*        Evaluate PSI and the derivative DPSI */

	dpsi = 0.f;
	psi = 0.f;
	erretm = 0.f;
	latime_1.ops += (real) (ii * 7);
	i__1 = ii;
	for (j = 1; j <= i__1; ++j) {
	    temp = z__[j] / (delta[j] * work[j]);
	    psi += z__[j] * temp;
	    dpsi += temp * temp;
	    erretm += psi;
/* L40: */
	}
	erretm = dabs(erretm);

/*        Evaluate PHI and the derivative DPHI */

	latime_1.ops += 14.f;
	temp = z__[*n] / (delta[*n] * work[*n]);
	phi = z__[*n] * temp;
	dphi = temp * temp;
	erretm = (-phi - psi) * 8.f + erretm - phi + rhoinv + dabs(tau) * (
		dpsi + dphi);

	w = rhoinv + phi + psi;

/*        Test for convergence */

	if (dabs(w) <= eps * erretm) {
	    goto L240;
	}

/*        Calculate the new step */

	++niter;
	latime_1.ops += 14.f;
	dtnsq1 = work[*n - 1] * delta[*n - 1];
	dtnsq = work[*n] * delta[*n];
	c__ = w - dtnsq1 * dpsi - dtnsq * dphi;
	a = (dtnsq + dtnsq1) * w - dtnsq * dtnsq1 * (dpsi + dphi);
	b = dtnsq * dtnsq1 * w;
	if (c__ < 0.f) {
	    c__ = dabs(c__);
	}
	if (c__ == 0.f) {
	    latime_1.ops += 2.f;
	    eta = *rho - *sigma * *sigma;
	} else if (a >= 0.f) {
	    latime_1.ops += 8.f;
	    eta = (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) / (
		    c__ * 2.f);
	} else {
	    latime_1.ops += 8.f;
	    eta = b * 2.f / (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(
		    r__1))));
	}

/*        Note, eta should be positive if w is negative, and   
          eta should be negative otherwise. However,   
          if for some reason caused by roundoff, eta*w > 0,   
          we simply use one Newton step instead. This way   
          will guarantee eta*w < 0. */

	latime_1.ops += 1.f;
	if (w * eta > 0.f) {
	    latime_1.ops += 2.f;
	    eta = -w / (dpsi + dphi);
	}
	temp = eta - dtnsq;
	if (temp > *rho) {
	    latime_1.ops += 1.f;
	    eta = *rho + dtnsq;
	}

	latime_1.ops += (real) ((*n << 1) + 7);
	tau += eta;
	eta /= *sigma + sqrt(eta + *sigma * *sigma);
	i__1 = *n;
	for (j = 1; j <= i__1; ++j) {
	    delta[j] -= eta;
	    work[j] += eta;
/* L50: */
	}

	*sigma += eta;

/*        Evaluate PSI and the derivative DPSI */

	dpsi = 0.f;
	psi = 0.f;
	erretm = 0.f;
	latime_1.ops += (real) (ii * 7);
	i__1 = ii;
	for (j = 1; j <= i__1; ++j) {
	    temp = z__[j] / (work[j] * delta[j]);
	    psi += z__[j] * temp;
	    dpsi += temp * temp;
	    erretm += psi;
/* L60: */
	}
	erretm = dabs(erretm);

/*        Evaluate PHI and the derivative DPHI */

	latime_1.ops += 14.f;
	temp = z__[*n] / (work[*n] * delta[*n]);
	phi = z__[*n] * temp;
	dphi = temp * temp;
	erretm = (-phi - psi) * 8.f + erretm - phi + rhoinv + dabs(tau) * (
		dpsi + dphi);

	w = rhoinv + phi + psi;

/*        Main loop to update the values of the array   DELTA */

	iter = niter + 1;

	for (niter = iter; niter <= 20; ++niter) {

/*           Test for convergence */

	    latime_1.ops += 1.f;
	    if (dabs(w) <= eps * erretm) {
		goto L240;
	    }

/*           Calculate the new step */

	    latime_1.ops += 22.f;
	    dtnsq1 = work[*n - 1] * delta[*n - 1];
	    dtnsq = work[*n] * delta[*n];
	    c__ = w - dtnsq1 * dpsi - dtnsq * dphi;
	    a = (dtnsq + dtnsq1) * w - dtnsq1 * dtnsq * (dpsi + dphi);
	    b = dtnsq1 * dtnsq * w;
	    if (a >= 0.f) {
		eta = (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) /
			 (c__ * 2.f);
	    } else {
		eta = b * 2.f / (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(
			r__1))));
	    }

/*           Note, eta should be positive if w is negative, and   
             eta should be negative otherwise. However,   
             if for some reason caused by roundoff, eta*w > 0,   
             we simply use one Newton step instead. This way   
             will guarantee eta*w < 0. */

	    latime_1.ops += 2.f;
	    if (w * eta > 0.f) {
		latime_1.ops += 2.f;
		eta = -w / (dpsi + dphi);
	    }
	    temp = eta - dtnsq;
	    if (temp <= 0.f) {
		latime_1.ops += 1.f;
		eta /= 2.f;
	    }

	    latime_1.ops += (real) ((*n << 1) + 7);
	    tau += eta;
	    eta /= *sigma + sqrt(eta + *sigma * *sigma);
	    i__1 = *n;
	    for (j = 1; j <= i__1; ++j) {
		delta[j] -= eta;
		work[j] += eta;
/* L70: */
	    }

	    *sigma += eta;

/*           Evaluate PSI and the derivative DPSI */

	    dpsi = 0.f;
	    psi = 0.f;
	    erretm = 0.f;
	    latime_1.ops += (real) (ii * 7);
	    i__1 = ii;
	    for (j = 1; j <= i__1; ++j) {
		temp = z__[j] / (work[j] * delta[j]);
		psi += z__[j] * temp;
		dpsi += temp * temp;
		erretm += psi;
/* L80: */
	    }
	    erretm = dabs(erretm);

/*           Evaluate PHI and the derivative DPHI */

	    latime_1.ops += 14.f;
	    temp = z__[*n] / (work[*n] * delta[*n]);
	    phi = z__[*n] * temp;
	    dphi = temp * temp;
	    erretm = (-phi - psi) * 8.f + erretm - phi + rhoinv + dabs(tau) * 
		    (dpsi + dphi);

	    w = rhoinv + phi + psi;
/* L90: */
	}

/*        Return with INFO = 1, NITER = MAXIT and not converged */

	*info = 1;
	goto L240;

/*        End for the case I = N */

    } else {

/*        The case for I < N */

	niter = 1;
	ip1 = *i__ + 1;

/*        Calculate initial guess */

	latime_1.ops += (real) ((*n << 2) + 9);
	delsq = (d__[ip1] - d__[*i__]) * (d__[ip1] + d__[*i__]);
	delsq2 = delsq / 2.f;
	temp = delsq2 / (d__[*i__] + sqrt(d__[*i__] * d__[*i__] + delsq2));
	i__1 = *n;
	for (j = 1; j <= i__1; ++j) {
	    work[j] = d__[j] + d__[*i__] + temp;
	    delta[j] = d__[j] - d__[*i__] - temp;
/* L100: */
	}

	psi = 0.f;
	latime_1.ops += (real) (*i__ - 1 << 2);
	i__1 = *i__ - 1;
	for (j = 1; j <= i__1; ++j) {
	    psi += z__[j] * z__[j] / (work[j] * delta[j]);
/* L110: */
	}

	phi = 0.f;
	latime_1.ops += (real) ((*n - *i__ - 1 << 2) + 10);
	i__1 = *i__ + 2;
	for (j = *n; j >= i__1; --j) {
	    phi += z__[j] * z__[j] / (work[j] * delta[j]);
/* L120: */
	}
	c__ = rhoinv + psi + phi;
	w = c__ + z__[*i__] * z__[*i__] / (work[*i__] * delta[*i__]) + z__[
		ip1] * z__[ip1] / (work[ip1] * delta[ip1]);

	if (w > 0.f) {

/*           d(i)^2 < the ith sigma^2 < (d(i)^2+d(i+1)^2)/2   

             We choose d(i) as origin. */

	    latime_1.ops += 20.f;
	    orgati = TRUE_;
	    sg2lb = 0.f;
	    sg2ub = delsq2;
	    a = c__ * delsq + z__[*i__] * z__[*i__] + z__[ip1] * z__[ip1];
	    b = z__[*i__] * z__[*i__] * delsq;
	    if (a > 0.f) {
		tau = b * 2.f / (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs(
			r__1))));
	    } else {
		tau = (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) /
slasd4.c - 源码说明

本页面展示了「提供矩阵类的函数库」中的 slasd4.c 源码文件，采用 C语言编程语言编写，共 1,088 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与矩阵函数库相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?