📄 dtgevc.c
字号:
/* L80: */
}
/* L90: */
}
xmax *= xscale;
/* ------------------ Begin Timing Code ----------------- */
opst += opssca;
/* ------------------- End Timing Code ------------------ */
}
/* Compute dot products
j-1
SUM = sum conjg( a*A(k,j) - b*B(k,j) )*x(k)
k=je
To reduce the op count, this is done as
_ j-1 _ j-1
a*conjg( sum A(k,j)*x(k) ) - b*conjg( sum B(k,j)*x(k) )
k=je k=je
which may cause underflow problems if A or B are close
to underflow. (E.g., less than SMALL.)
A series of compiler directives to defeat vectorization
for the next loop
$PL$ CMCHAR=' '
DIR$ NEXTSCALAR
$DIR SCALAR
DIR$ NEXT SCALAR
VD$L NOVECTOR
DEC$ NOVECTOR
VD$ NOVECTOR
VDIR NOVECTOR
VOCL LOOP,SCALAR
IBM PREFER SCALAR
$PL$ CMCHAR='*' */
i__3 = nw;
for (jw = 1; jw <= i__3; ++jw) {
/* $PL$ CMCHAR=' '
DIR$ NEXTSCALAR
$DIR SCALAR
DIR$ NEXT SCALAR
VD$L NOVECTOR
DEC$ NOVECTOR
VD$ NOVECTOR
VDIR NOVECTOR
VOCL LOOP,SCALAR
IBM PREFER SCALAR
$PL$ CMCHAR='*' */
i__4 = na;
for (ja = 1; ja <= i__4; ++ja) {
suma_ref(ja, jw) = 0.;
sumb_ref(ja, jw) = 0.;
i__5 = j - 1;
for (jr = je; jr <= i__5; ++jr) {
suma_ref(ja, jw) = suma_ref(ja, jw) + a_ref(jr, j
+ ja - 1) * work[(jw + 1) * *n + jr];
sumb_ref(ja, jw) = sumb_ref(ja, jw) + b_ref(jr, j
+ ja - 1) * work[(jw + 1) * *n + jr];
/* L100: */
}
/* L110: */
}
/* L120: */
}
/* $PL$ CMCHAR=' '
DIR$ NEXTSCALAR
$DIR SCALAR
DIR$ NEXT SCALAR
VD$L NOVECTOR
DEC$ NOVECTOR
VD$ NOVECTOR
VDIR NOVECTOR
VOCL LOOP,SCALAR
IBM PREFER SCALAR
$PL$ CMCHAR='*' */
i__3 = na;
for (ja = 1; ja <= i__3; ++ja) {
if (ilcplx) {
sum_ref(ja, 1) = -acoef * suma_ref(ja, 1) + bcoefr *
sumb_ref(ja, 1) - bcoefi * sumb_ref(ja, 2);
sum_ref(ja, 2) = -acoef * suma_ref(ja, 2) + bcoefr *
sumb_ref(ja, 2) + bcoefi * sumb_ref(ja, 1);
} else {
sum_ref(ja, 1) = -acoef * suma_ref(ja, 1) + bcoefr *
sumb_ref(ja, 1);
}
/* L130: */
}
/* T
Solve ( a A - b B ) y = SUM(,)
with scaling and perturbation of the denominator */
dlaln2_(&c_true, &na, &nw, &dmin__, &acoef, &a_ref(j, j), lda,
bdiag, &bdiag[1], sum, &c__2, &bcoefr, &bcoefi, &
work[(*n << 1) + j], n, &scale, &temp, &iinfo);
if (scale < 1.) {
i__3 = nw - 1;
for (jw = 0; jw <= i__3; ++jw) {
i__4 = j - 1;
for (jr = je; jr <= i__4; ++jr) {
work[(jw + 2) * *n + jr] = scale * work[(jw + 2) *
*n + jr];
/* L140: */
}
/* L150: */
}
xmax = scale * xmax;
/* ------------------ Begin Timing Code ----------------- */
opst += opssca;
/* ------------------- End Timing Code ------------------ */
}
xmax = max(xmax,temp);
L160:
;
}
/* Copy eigenvector to VL, back transforming if
HOWMNY='B'. */
++ieig;
if (ilback) {
i__2 = nw - 1;
for (jw = 0; jw <= i__2; ++jw) {
i__3 = *n + 1 - je;
dgemv_("N", n, &i__3, &c_b35, &vl_ref(1, je), ldvl, &work[
(jw + 2) * *n + je], &c__1, &c_b37, &work[(jw + 4)
* *n + 1], &c__1);
/* L170: */
}
dlacpy_(" ", n, &nw, &work[(*n << 2) + 1], n, &vl_ref(1, je),
ldvl);
ibeg = 1;
} else {
dlacpy_(" ", n, &nw, &work[(*n << 1) + 1], n, &vl_ref(1, ieig)
, ldvl);
ibeg = je;
}
/* Scale eigenvector */
xmax = 0.;
if (ilcplx) {
i__2 = *n;
for (j = ibeg; j <= i__2; ++j) {
/* Computing MAX */
d__3 = xmax, d__4 = (d__1 = vl_ref(j, ieig), abs(d__1)) +
(d__2 = vl_ref(j, ieig + 1), abs(d__2));
xmax = max(d__3,d__4);
/* L180: */
}
} else {
i__2 = *n;
for (j = ibeg; j <= i__2; ++j) {
/* Computing MAX */
d__2 = xmax, d__3 = (d__1 = vl_ref(j, ieig), abs(d__1));
xmax = max(d__2,d__3);
/* L190: */
}
}
if (xmax > safmin) {
xscale = 1. / xmax;
i__2 = nw - 1;
for (jw = 0; jw <= i__2; ++jw) {
i__3 = *n;
for (jr = ibeg; jr <= i__3; ++jr) {
vl_ref(jr, ieig + jw) = xscale * vl_ref(jr, ieig + jw)
;
/* L200: */
}
/* L210: */
}
}
ieig = ieig + nw - 1;
/* ------------------- Begin Timing Code ----------------------
Opcounts for each eigenvector
Real Complex
Initialization 8--16 71--87
Dot Prod (per iter) 4*NA*(J-JE) + 2 8*NA*(J-JE) + 2
+ 6*NA + scaling + 13*NA + scaling
Solve (per iter) NA*(5 + 7*(NA-1)) NA*(17 + 17*(NA-1))
+ scaling + scaling
Back xform 2*N*(N+1-JE) - N 4*N*(N+1-JE) - 2*N
Scaling (w/back x.) N 3*N
Scaling (w/o back) N - (JE-1) 3*N - 3*(JE-1) */
if (! ilcplx) {
opst += (doublereal) ((*n - je << 1) * (*n + 1 - je) + (*n -
je) * 13 + (in2by2 << 3) + 12);
if (ilback) {
opst += (doublereal) ((*n << 1) * (*n + 1 - je));
} else {
opst += (doublereal) (*n + 1 - je);
}
} else {
opst += (doublereal) ((*n - 1 - je << 5) + (*n - je << 2) * (*
n + 1 - je) + in2by2 * 24 + 71);
if (ilback) {
opst += (doublereal) ((*n << 2) * (*n + 1 - je) + *n);
} else {
opst += (doublereal) ((*n + 1 - je) * 3);
}
}
latime_1.ops += opst;
/* -------------------- End Timing Code ----------------------- */
L220:
;
}
}
/* Right eigenvectors */
if (compr) {
ieig = im + 1;
/* Main loop over eigenvalues */
ilcplx = FALSE_;
for (je = *n; je >= 1; --je) {
/* Skip this iteration if (a) HOWMNY='S' and SELECT=.FALSE., or
(b) this would be the second of a complex pair.
Check for complex eigenvalue, so as to be sure of which
entry(-ies) of SELECT to look at -- if complex, SELECT(JE)
or SELECT(JE-1).
If this is a complex pair, the 2-by-2 diagonal block
corresponding to the eigenvalue is in rows/columns JE-1:JE */
if (ilcplx) {
ilcplx = FALSE_;
goto L500;
}
nw = 1;
if (je > 1) {
if (a_ref(je, je - 1) != 0.) {
ilcplx = TRUE_;
nw = 2;
}
}
if (ilall) {
ilcomp = TRUE_;
} else if (ilcplx) {
ilcomp = select[je] || select[je - 1];
} else {
ilcomp = select[je];
}
if (! ilcomp) {
goto L500;
}
/* Decide if (a) singular pencil, (b) real eigenvalue, or
(c) complex eigenvalue. */
if (! ilcplx) {
if ((d__1 = a_ref(je, je), abs(d__1)) <= safmin && (d__2 =
b_ref(je, je), abs(d__2)) <= safmin) {
/* Singular matrix pencil -- returns unit eigenvector */
--ieig;
i__1 = *n;
for (jr = 1; jr <= i__1; ++jr) {
vr_ref(jr, ieig) = 0.;
/* L230: */
}
vr_ref(ieig, ieig) = 1.;
goto L500;
}
}
/* Clear vector */
i__1 = nw - 1;
for (jw = 0; jw <= i__1; ++jw) {
i__2 = *n;
for (jr = 1; jr <= i__2; ++jr) {
work[(jw + 2) * *n + jr] = 0.;
/* L240: */
}
/* L250: */
}
/* Compute coefficients in ( a A - b B ) x = 0
a is ACOEF
b is BCOEFR + i*BCOEFI */
if (! ilcplx) {
/* Real eigenvalue
Computing MAX */
d__3 = (d__1 = a_ref(je, je), abs(d__1)) * ascale, d__4 = (
d__2 = b_ref(je, je), abs(d__2)) * bscale, d__3 = max(
d__3,d__4);
temp = 1. / max(d__3,safmin);
salfar = temp * a_ref(je, je) * ascale;
sbeta = temp * b_ref(je, je) * bscale;
acoef = sbeta * ascale;
bcoefr = salfar * bscale;
bcoefi = 0.;
/* Scale to avoid underflow */
scale = 1.;
lsa = abs(sbeta) >= safmin && abs(acoef) < small;
lsb = abs(salfar) >= safmin && abs(bcoefr) < small;
if (lsa) {
scale = small / abs(sbeta) * min(anorm,big);
}
if (lsb) {
/* Computing MAX */
d__1 = scale, d__2 = small / abs(salfar) * min(bnorm,big);
scale = max(d__1,d__2);
}
if (lsa || lsb) {
/* Computing MIN
Computing MAX */
d__3 = 1., d__4 = abs(acoef), d__3 = max(d__3,d__4), d__4
= abs(bcoefr);
d__1 = scale, d__2 = 1. / (safmin * max(d__3,d__4));
scale = min(d__1,d__2);
if (lsa) {
acoef = ascale * (scale * sbeta);
} else {
acoef = scale * acoef;
}
if (lsb) {
bcoefr = bscale * (scale * salfar);
} else {
bcoefr = scale * bcoefr;
}
}
acoefa = abs(acoef);
bcoefa = abs(bcoefr);
/* First component is 1 */
work[(*n << 1) + je] = 1.;
xmax = 1.;
/* Compute contribution from column JE of A and B to sum
(See "Further Details", above.) */
i__1 = je - 1;
for (jr = 1; jr <= i__1; ++jr) {
work[(*n << 1) + jr] = bcoefr * b_ref(jr, je) - acoef *
a_ref(jr, je);
/* L260: */
}
} else {
/* Complex eigenvalue */
d__1 = safmin * 100.;
dlag2_(&a_ref(je - 1, je - 1), lda, &b_ref(je - 1, je - 1),
ldb, &d__1, &acoef, &temp, &bcoefr, &temp2, &bcoefi);
if (bcoefi == 0.) {
*info = je - 1;
return 0;
}
/* Scale to avoid over/underflow */
acoefa = abs(acoef);
bcoefa = abs(bcoefr) + abs(bcoefi);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -