📄 stgevc.c
字号:
if (compl) {
ieig = 0;
/* Main loop over eigenvalues */
ilcplx = FALSE_;
i__1 = *n;
for (je = 1; je <= i__1; ++je) {
/* Skip this iteration if (a) HOWMNY='S' and SELECT=.FALSE., or
(b) this would be the second of a complex pair.
Check for complex eigenvalue, so as to be sure of which
entry(-ies) of SELECT to look at. */
if (ilcplx) {
ilcplx = FALSE_;
goto L220;
}
nw = 1;
if (je < *n) {
if (a_ref(je + 1, je) != 0.f) {
ilcplx = TRUE_;
nw = 2;
}
}
if (ilall) {
ilcomp = TRUE_;
} else if (ilcplx) {
ilcomp = select[je] || select[je + 1];
} else {
ilcomp = select[je];
}
if (! ilcomp) {
goto L220;
}
/* Decide if (a) singular pencil, (b) real eigenvalue, or
(c) complex eigenvalue. */
if (! ilcplx) {
if ((r__1 = a_ref(je, je), dabs(r__1)) <= safmin && (r__2 =
b_ref(je, je), dabs(r__2)) <= safmin) {
/* Singular matrix pencil -- returns unit eigenvector */
++ieig;
i__2 = *n;
for (jr = 1; jr <= i__2; ++jr) {
vl_ref(jr, ieig) = 0.f;
/* L60: */
}
vl_ref(ieig, ieig) = 1.f;
goto L220;
}
}
/* Clear vector */
i__2 = nw * *n;
for (jr = 1; jr <= i__2; ++jr) {
work[(*n << 1) + jr] = 0.f;
/* L70: */
}
/* T
Compute coefficients in ( a A - b B ) y = 0
a is ACOEF
b is BCOEFR + i*BCOEFI */
if (! ilcplx) {
/* Real eigenvalue
Computing MAX */
r__3 = (r__1 = a_ref(je, je), dabs(r__1)) * ascale, r__4 = (
r__2 = b_ref(je, je), dabs(r__2)) * bscale, r__3 =
max(r__3,r__4);
temp = 1.f / dmax(r__3,safmin);
salfar = temp * a_ref(je, je) * ascale;
sbeta = temp * b_ref(je, je) * bscale;
acoef = sbeta * ascale;
bcoefr = salfar * bscale;
bcoefi = 0.f;
/* Scale to avoid underflow */
scale = 1.f;
lsa = dabs(sbeta) >= safmin && dabs(acoef) < small;
lsb = dabs(salfar) >= safmin && dabs(bcoefr) < small;
if (lsa) {
scale = small / dabs(sbeta) * dmin(anorm,big);
}
if (lsb) {
/* Computing MAX */
r__1 = scale, r__2 = small / dabs(salfar) * dmin(bnorm,
big);
scale = dmax(r__1,r__2);
}
if (lsa || lsb) {
/* Computing MIN
Computing MAX */
r__3 = 1.f, r__4 = dabs(acoef), r__3 = max(r__3,r__4),
r__4 = dabs(bcoefr);
r__1 = scale, r__2 = 1.f / (safmin * dmax(r__3,r__4));
scale = dmin(r__1,r__2);
if (lsa) {
acoef = ascale * (scale * sbeta);
} else {
acoef = scale * acoef;
}
if (lsb) {
bcoefr = bscale * (scale * salfar);
} else {
bcoefr = scale * bcoefr;
}
}
acoefa = dabs(acoef);
bcoefa = dabs(bcoefr);
/* First component is 1 */
work[(*n << 1) + je] = 1.f;
xmax = 1.f;
} else {
/* Complex eigenvalue */
r__1 = safmin * 100.f;
slag2_(&a_ref(je, je), lda, &b_ref(je, je), ldb, &r__1, &
acoef, &temp, &bcoefr, &temp2, &bcoefi);
bcoefi = -bcoefi;
if (bcoefi == 0.f) {
*info = je;
return 0;
}
/* Scale to avoid over/underflow */
acoefa = dabs(acoef);
bcoefa = dabs(bcoefr) + dabs(bcoefi);
scale = 1.f;
if (acoefa * ulp < safmin && acoefa >= safmin) {
scale = safmin / ulp / acoefa;
}
if (bcoefa * ulp < safmin && bcoefa >= safmin) {
/* Computing MAX */
r__1 = scale, r__2 = safmin / ulp / bcoefa;
scale = dmax(r__1,r__2);
}
if (safmin * acoefa > ascale) {
scale = ascale / (safmin * acoefa);
}
if (safmin * bcoefa > bscale) {
/* Computing MIN */
r__1 = scale, r__2 = bscale / (safmin * bcoefa);
scale = dmin(r__1,r__2);
}
if (scale != 1.f) {
acoef = scale * acoef;
acoefa = dabs(acoef);
bcoefr = scale * bcoefr;
bcoefi = scale * bcoefi;
bcoefa = dabs(bcoefr) + dabs(bcoefi);
}
/* Compute first two components of eigenvector */
temp = acoef * a_ref(je + 1, je);
temp2r = acoef * a_ref(je, je) - bcoefr * b_ref(je, je);
temp2i = -bcoefi * b_ref(je, je);
if (dabs(temp) > dabs(temp2r) + dabs(temp2i)) {
work[(*n << 1) + je] = 1.f;
work[*n * 3 + je] = 0.f;
work[(*n << 1) + je + 1] = -temp2r / temp;
work[*n * 3 + je + 1] = -temp2i / temp;
} else {
work[(*n << 1) + je + 1] = 1.f;
work[*n * 3 + je + 1] = 0.f;
temp = acoef * a_ref(je, je + 1);
work[(*n << 1) + je] = (bcoefr * b_ref(je + 1, je + 1) -
acoef * a_ref(je + 1, je + 1)) / temp;
work[*n * 3 + je] = bcoefi * b_ref(je + 1, je + 1) / temp;
}
/* Computing MAX */
r__5 = (r__1 = work[(*n << 1) + je], dabs(r__1)) + (r__2 =
work[*n * 3 + je], dabs(r__2)), r__6 = (r__3 = work[(*
n << 1) + je + 1], dabs(r__3)) + (r__4 = work[*n * 3
+ je + 1], dabs(r__4));
xmax = dmax(r__5,r__6);
}
/* Computing MAX */
r__1 = ulp * acoefa * anorm, r__2 = ulp * bcoefa * bnorm, r__1 =
max(r__1,r__2);
dmin__ = dmax(r__1,safmin);
/* T
Triangular solve of (a A - b B) y = 0
T
(rowwise in (a A - b B) , or columnwise in (a A - b B) ) */
il2by2 = FALSE_;
/* ------------------- Begin Timing Code ---------------------- */
opst = 0.f;
in2by2 = 0;
/* -------------------- End Timing Code ----------------------- */
i__2 = *n;
for (j = je + nw; j <= i__2; ++j) {
/* ------------------- Begin Timing Code ------------------- */
opssca = (real) (nw * (j - je) + 1);
/* -------------------- End Timing Code -------------------- */
if (il2by2) {
il2by2 = FALSE_;
goto L160;
}
na = 1;
bdiag[0] = b_ref(j, j);
if (j < *n) {
if (a_ref(j + 1, j) != 0.f) {
il2by2 = TRUE_;
bdiag[1] = b_ref(j + 1, j + 1);
na = 2;
/* ---------------- Begin Timing Code ---------------- */
++in2by2;
/* ----------------- End Timing Code ----------------- */
}
}
/* Check whether scaling is necessary for dot products */
xscale = 1.f / dmax(1.f,xmax);
/* Computing MAX */
r__1 = work[j], r__2 = work[*n + j], r__1 = max(r__1,r__2),
r__2 = acoefa * work[j] + bcoefa * work[*n + j];
temp = dmax(r__1,r__2);
if (il2by2) {
/* Computing MAX */
r__1 = temp, r__2 = work[j + 1], r__1 = max(r__1,r__2),
r__2 = work[*n + j + 1], r__1 = max(r__1,r__2),
r__2 = acoefa * work[j + 1] + bcoefa * work[*n +
j + 1];
temp = dmax(r__1,r__2);
}
if (temp > bignum * xscale) {
i__3 = nw - 1;
for (jw = 0; jw <= i__3; ++jw) {
i__4 = j - 1;
for (jr = je; jr <= i__4; ++jr) {
work[(jw + 2) * *n + jr] = xscale * work[(jw + 2)
* *n + jr];
/* L80: */
}
/* L90: */
}
xmax *= xscale;
/* ------------------ Begin Timing Code ----------------- */
opst += opssca;
/* ------------------- End Timing Code ------------------ */
}
/* Compute dot products
j-1
SUM = sum conjg( a*A(k,j) - b*B(k,j) )*x(k)
k=je
To reduce the op count, this is done as
_ j-1 _ j-1
a*conjg( sum A(k,j)*x(k) ) - b*conjg( sum B(k,j)*x(k) )
k=je k=je
which may cause underflow problems if A or B are close
to underflow. (E.g., less than SMALL.)
A series of compiler directives to defeat vectorization
for the next loop
$PL$ CMCHAR=' '
DIR$ NEXTSCALAR
$DIR SCALAR
DIR$ NEXT SCALAR
VD$L NOVECTOR
DEC$ NOVECTOR
VD$ NOVECTOR
VDIR NOVECTOR
VOCL LOOP,SCALAR
IBM PREFER SCALAR
$PL$ CMCHAR='*' */
i__3 = nw;
for (jw = 1; jw <= i__3; ++jw) {
/* $PL$ CMCHAR=' '
DIR$ NEXTSCALAR
$DIR SCALAR
DIR$ NEXT SCALAR
VD$L NOVECTOR
DEC$ NOVECTOR
VD$ NOVECTOR
VDIR NOVECTOR
VOCL LOOP,SCALAR
IBM PREFER SCALAR
$PL$ CMCHAR='*' */
i__4 = na;
for (ja = 1; ja <= i__4; ++ja) {
suma_ref(ja, jw) = 0.f;
sumb_ref(ja, jw) = 0.f;
i__5 = j - 1;
for (jr = je; jr <= i__5; ++jr) {
suma_ref(ja, jw) = suma_ref(ja, jw) + a_ref(jr, j
+ ja - 1) * work[(jw + 1) * *n + jr];
sumb_ref(ja, jw) = sumb_ref(ja, jw) + b_ref(jr, j
+ ja - 1) * work[(jw + 1) * *n + jr];
/* L100: */
}
/* L110: */
}
/* L120: */
}
/* $PL$ CMCHAR=' '
DIR$ NEXTSCALAR
$DIR SCALAR
DIR$ NEXT SCALAR
VD$L NOVECTOR
DEC$ NOVECTOR
VD$ NOVECTOR
VDIR NOVECTOR
VOCL LOOP,SCALAR
IBM PREFER SCALAR
$PL$ CMCHAR='*' */
i__3 = na;
for (ja = 1; ja <= i__3; ++ja) {
if (ilcplx) {
sum_ref(ja, 1) = -acoef * suma_ref(ja, 1) + bcoefr *
sumb_ref(ja, 1) - bcoefi * sumb_ref(ja, 2);
sum_ref(ja, 2) = -acoef * suma_ref(ja, 2) + bcoefr *
sumb_ref(ja, 2) + bcoefi * sumb_ref(ja, 1);
} else {
sum_ref(ja, 1) = -acoef * suma_ref(ja, 1) + bcoefr *
sumb_ref(ja, 1);
}
/* L130: */
}
/* T
Solve ( a A - b B ) y = SUM(,)
with scaling and perturbation of the denominator */
slaln2_(&c_true, &na, &nw, &dmin__, &acoef, &a_ref(j, j), lda,
bdiag, &bdiag[1], sum, &c__2, &bcoefr, &bcoefi, &
work[(*n << 1) + j], n, &scale, &temp, &iinfo);
if (scale < 1.f) {
i__3 = nw - 1;
for (jw = 0; jw <= i__3; ++jw) {
i__4 = j - 1;
for (jr = je; jr <= i__4; ++jr) {
work[(jw + 2) * *n + jr] = scale * work[(jw + 2) *
*n + jr];
/* L140: */
}
/* L150: */
}
xmax = scale * xmax;
/* ------------------ Begin Timing Code ----------------- */
opst += opssca;
/* ------------------- End Timing Code ------------------ */
}
xmax = dmax(xmax,temp);
L160:
;
}
/* Copy eigenvector to VL, back transforming if
HOWMNY='B'. */
++ieig;
if (ilback) {
i__2 = nw - 1;
for (jw = 0; jw <= i__2; ++jw) {
i__3 = *n + 1 - je;
sgemv_("N", n, &i__3, &c_b35, &vl_ref(1, je), ldvl, &work[
(jw + 2) * *n + je], &c__1, &c_b37, &work[(jw + 4)
* *n + 1], &c__1);
/* L170: */
}
slacpy_(" ", n, &nw, &work[(*n << 2) + 1], n, &vl_ref(1, je),
ldvl);
ibeg = 1;
} else {
slacpy_(" ", n, &nw, &work[(*n << 1) + 1], n, &vl_ref(1, ieig)
, ldvl);
ibeg = je;
}
/* Scale eigenvector */
xmax = 0.f;
if (ilcplx) {
i__2 = *n;
for (j = ibeg; j <= i__2; ++j) {
/* Computing MAX */
r__3 = xmax, r__4 = (r__1 = vl_ref(j, ieig), dabs(r__1))
+ (r__2 = vl_ref(j, ieig + 1), dabs(r__2));
xmax = dmax(r__3,r__4);
/* L180: */
}
} else {
i__2 = *n;
for (j = ibeg; j <= i__2; ++j) {
/* Computing MAX */
r__2 = xmax, r__3 = (r__1 = vl_ref(j, ieig), dabs(r__1));
xmax = dmax(r__2,r__3);
/* L190: */
}
}
if (xmax > safmin) {
xscale = 1.f / xmax;
i__2 = nw - 1;
for (jw = 0; jw <= i__2; ++jw) {
i__3 = *n;
for (jr = ibeg; jr <= i__3; ++jr) {
vl_ref(jr, ieig + jw) = xscale * vl_ref(jr, ieig + jw)
;
/* L200: */
}
/* L210: */
}
}
ieig = ieig + nw - 1;
/* ------------------- Begin Timing Code ----------------------
Opcounts for each eigenvector
Real Complex
Initialization 8--16 71--87
Dot Prod (per iter) 4*NA*(J-JE) + 2 8*NA*(J-JE) + 2
+ 6*NA + scaling + 13*NA + scaling
Solve (per iter) NA*(5 + 7*(NA-1)) NA*(17 + 17*(NA-1))
+ scaling + scaling
Back xform 2*N*(N+1-JE) - N 4*N*(N+1-JE) - 2*N
Scaling (w/back x.) N 3*N
Scaling (w/o back) N - (JE-1) 3*N - 3*(JE-1) */
if (! ilcplx) {
opst += (real) ((*n - je << 1) * (*n + 1 - je) + (*n - je) *
13 + (in2by2 << 3) + 12);
if (ilback) {
opst += (real) ((*n << 1) * (*n + 1 - je));
} else {
opst += (real) (*n + 1 - je);
}
} else {
opst += (real) ((*n - 1 - je << 5) + (*n - je << 2) * (*n + 1
- je) + in2by2 * 24 + 71);
if (ilback) {
opst += (real) ((*n << 2) * (*n + 1 - je) + *n);
} else {
opst += (real) ((*n + 1 - je) * 3);
}
}
latime_1.ops += opst;
/* -------------------- End Timing Code ----------------------- */
L220:
;
}
}
/* Right eigenvectors */
if (compr) {
ieig = im + 1;
/* Main loop over eigenvalues */
ilcplx = FALSE_;
for (je = *n; je >= 1; --je) {
/* Skip this iteration if (a) HOWMNY='S' and SELECT=.FALSE., or
(b) this would be the second of a complex pair.
Check for complex eigenvalue, so as to be sure of which
entry(-ies) of SELECT to look at -- if complex, SELECT(JE)
or SELECT(JE-1).
If this is a complex pair, the 2-by-2 diagonal block
corresponding to the eigenvalue is in rows/columns JE-1:JE */
if (ilcplx) {
ilcplx = FALSE_;
goto L500;
}
nw = 1;
if (je > 1) {
if (a_ref(je, je - 1) != 0.f) {
ilcplx = TRUE_;
nw = 2;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -