eigensolver.c

来自「MIT开发出来的计算光子晶体的软件」· C语言代码 · 共 647 行 · 第 1/2 页
647 行
/* Copyright (C) 1999, 2000, 2001, 2002, Massachusetts Institute of Technology. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */#include <stdlib.h>#include <stdio.h>#include <math.h>#include "../config.h"#include <mpiglue.h>#include <mpi_utils.h>#include <check.h>#include <scalar.h>#include <matrices.h>#include <blasglue.h>#include "eigensolver.h"#include "linmin.h"extern void eigensolver_get_eigenvals_aux(evectmatrix Y, real *eigenvals,                                          evectoperator A, void *Adata,                                          evectmatrix Work1, evectmatrix Work2,                                          sqmatrix U, sqmatrix Usqrt,                                          sqmatrix Uwork);#define STRINGIZEx(x) #x /* a hack so that we can stringize macro values */#define STRINGIZE(x) STRINGIZEx(x)#define K_PI 3.141592653589793238462643383279502884197#define MIN2(a,b) ((a) < (b) ? (a) : (b))#define MAX2(a,b) ((a) > (b) ? (a) : (b))/* Evalutate op, and set t to the elapsed time (in seconds). */#define TIME_OP(t, op) { \     mpiglue_clock_t xxx_time_op_start_time = MPIGLUE_CLOCK; \     { \	  op; \     } \     (t) = MPIGLUE_CLOCK_DIFF(MPIGLUE_CLOCK, xxx_time_op_start_time); \}/**************************************************************************/#define EIGENSOLVER_MAX_ITERATIONS 10000#define FEEDBACK_TIME 4.0 /* elapsed time before we print progress feedback *//* Number of iterations after which to reset conjugate gradient   direction to steepest descent.  (Picked after some experimentation.   Is there a better basis?  Should this change with the problem   size?) */#define CG_RESET_ITERS 70/* Threshold for trace(1/YtY) = trace(U) before we reorthogonalize: */#define EIGS_TRACE_U_THRESHOLD 1e8/**************************************************************************//* estimated times/iteration for different iteration schemes, based   on the measure times for various operations and the operation counts: */#define EXACT_LINMIN_TIME(t_AZ, t_KZ, t_ZtW, t_ZS, t_ZtZ, t_linmin) \     ((t_AZ)*2 + (t_KZ) + (t_ZtW)*4 + (t_ZS)*2 + (t_ZtZ)*2 + (t_linmin))#define APPROX_LINMIN_TIME(t_AZ, t_KZ, t_ZtW, t_ZS, t_ZtZ) \     ((t_AZ)*2 + (t_KZ) + (t_ZtW)*2 + (t_ZS)*2 + (t_ZtZ)*2)/* Guess for the convergence slowdown factor due to the approximate   line minimization.  It is probably best to be conservative, as the   exact line minimization is more reliable and we only want to   abandon it if there is a big speed gain. */#define APPROX_LINMIN_SLOWDOWN_GUESS 2.0/* We also don't want to use the approximate line minimization if   the exact line minimization makes a big difference in the value   of the trace that's achieved (i.e. if one step of Newton's method   on the trace derivative does not do a good job).  The following   is the maximum improvement by the exact line minimization (over   one step of Newton) at which we'll allow the use of approximate line   minimization. */#define APPROX_LINMIN_IMPROVEMENT_THRESHOLD 0.05/**************************************************************************/typedef struct {     sqmatrix YtAY, DtAD, symYtAD, YtY, DtD, symYtD, S1, S2, S3;} trace_func_data;static double trace_func(double theta, double *trace_deriv, void *data){     double trace;     trace_func_data *d = (trace_func_data *) data;     {	  double c = cos(theta), s = sin(theta);	  sqmatrix_copy(d->S1, d->YtY);	  sqmatrix_aApbB(c*c, d->S1, s*s, d->DtD);	  sqmatrix_ApaB(d->S1, 2*s*c, d->symYtD);	  sqmatrix_invert(d->S1, 1, d->S2);	  	  sqmatrix_copy(d->S2, d->YtAY);	  sqmatrix_aApbB(c*c, d->S2, s*s, d->DtAD);	  sqmatrix_ApaB(d->S2, 2*s*c, d->symYtAD);	  	  trace = SCALAR_RE(sqmatrix_traceAtB(d->S2, d->S1));     }     if (trace_deriv) {	  double c2 = cos(2*theta), s2 = sin(2*theta);	  	  sqmatrix_copy(d->S3, d->YtAY);	  sqmatrix_ApaB(d->S3, -1.0, d->DtAD);	  sqmatrix_aApbB(-0.5 * s2, d->S3, c2, d->symYtAD);	  	  *trace_deriv = SCALAR_RE(sqmatrix_traceAtB(d->S1, d->S3));	  	  sqmatrix_AeBC(d->S3, d->S1, 0, d->S2, 1);	  sqmatrix_AeBC(d->S2, d->S3, 0, d->S1, 1);	  	  sqmatrix_copy(d->S3, d->YtY);	  sqmatrix_ApaB(d->S3, -1.0, d->DtD);	  sqmatrix_aApbB(-0.5 * s2, d->S3, c2, d->symYtD);	  	  *trace_deriv -= SCALAR_RE(sqmatrix_traceAtB(d->S2, d->S3));	  *trace_deriv *= 2;     }     return trace;}/**************************************************************************/#define EIG_HISTORY_SIZE 5void eigensolver(evectmatrix Y, real *eigenvals,                 evectoperator A, void *Adata,                 evectpreconditioner K, void *Kdata,                 evectconstraint constraint, void *constraint_data,                 evectmatrix Work[], int nWork,                 real tolerance, int *num_iterations,                 int flags){     real convergence_history[EIG_HISTORY_SIZE];     evectmatrix G, D, X, prev_G;     short usingConjugateGradient = 0, use_polak_ribiere = 0,	   use_linmin = 1;     real E, prev_E = 0.0;     real d_scale = 1.0;     real traceGtX, prev_traceGtX = 0.0;     real theta, prev_theta = 0.5;     int i, iteration = 0;     mpiglue_clock_t prev_feedback_time;     real time_AZ, time_KZ=0, time_ZtZ, time_ZtW, time_ZS, time_linmin=0;     real linmin_improvement = 0;     sqmatrix YtAYU, DtAD, symYtAD, YtY, U, DtD, symYtD, S1, S2, S3;     trace_func_data tfd;     prev_feedback_time = MPIGLUE_CLOCK;     #ifdef DEBUG     flags |= EIGS_VERBOSE;#endif     CHECK(nWork >= 2, "not enough workspace");     G = Work[0];     X = Work[1];     usingConjugateGradient = nWork >= 3;     if (usingConjugateGradient) {          D = Work[2];          for (i = 0; i < D.n * D.p; ++i)               ASSIGN_ZERO(D.data[i]);     }     else          D = X;     use_polak_ribiere = nWork >= 4;     if (use_polak_ribiere) {          prev_G = Work[3];          for (i = 0; i < Y.n * Y.p; ++i)               ASSIGN_ZERO(prev_G.data[i]);	  if (flags & EIGS_ORTHOGONAL_PRECONDITIONER)  /* see below */	       fprintf(stderr, "WARNING: Polak-Ribiere may not work with the "		       "orthogonal-preconditioner option.\n");     }     else          prev_G = G;          YtAYU = create_sqmatrix(Y.p);  /* holds Yt A Y */     DtAD = create_sqmatrix(Y.p);  /* holds Dt A D */     symYtAD = create_sqmatrix(Y.p);  /* holds (Yt A D + Dt A Y) / 2 */     YtY = create_sqmatrix(Y.p);  /* holds Yt Y */     U = create_sqmatrix(Y.p);  /* holds 1 / (Yt Y) */     DtD = create_sqmatrix(Y.p);  /* holds Dt D */     symYtD = create_sqmatrix(Y.p);  /* holds (Yt D + Dt Y) / 2 */     /* Notation note: "t" represents a dagger superscript, so	Yt represents adjoint(Y), or Y' in MATLAB syntax. */     /* scratch matrices: */     S1 = create_sqmatrix(Y.p);     S2 = create_sqmatrix(Y.p);     S3 = create_sqmatrix(Y.p);     tfd.YtAY = S1; tfd.DtAD = DtAD; tfd.symYtAD = symYtAD;     tfd.YtY = YtY; tfd.DtD = DtD; tfd.symYtD = symYtD;     tfd.S1 = YtAYU; tfd.S2 = S2; tfd.S3 = S3;     if (flags & EIGS_ORTHONORMALIZE_FIRST_STEP) {	  evectmatrix_XtX(U, Y, S2);	  sqmatrix_invert(U, 1, S2);	  sqmatrix_sqrt(S1, U, S2); /* S1 = 1/sqrt(Yt*Y) */	  evectmatrix_XeYS(G, Y, S1, 1); /* G = orthonormalize Y */	  evectmatrix_copy(Y, G);     }     for (i = 0; i < Y.p; ++i)          eigenvals[i] = 0.0;     for (i = 0; i < EIG_HISTORY_SIZE; ++i)	  convergence_history[i] = 10000.0;     if (constraint)	  constraint(Y, constraint_data);     do {	  real y_norm, gamma_numerator = 0;	  if (flags & EIGS_FORCE_APPROX_LINMIN)	       use_linmin = 0;	  TIME_OP(time_ZtZ, evectmatrix_XtX(YtY, Y, S2));	  y_norm = sqrt(SCALAR_RE(sqmatrix_trace(YtY)) / Y.p);	  blasglue_rscal(Y.p * Y.n, 1/y_norm, Y.data, 1);	  blasglue_rscal(Y.p * Y.p, 1/(y_norm*y_norm), YtY.data, 1);	  sqmatrix_copy(U, YtY);	  sqmatrix_invert(U, 1, S2);	  /* If trace(1/YtY) gets big, it means that the columns	     of Y are becoming nearly parallel.  This sometimes happens,	     especially in the targeted eigensolver, because the	     preconditioner pushes all the columns towards the ground	     state.  If it gets too big, it seems to be a good idea	     to re-orthogonalize, resetting conjugate-gradient, as	     otherwise we start to encounter numerical problems. */	  if (flags & EIGS_REORTHOGONALIZE) {	       real traceU = SCALAR_RE(sqmatrix_trace(U));	       mpi_assert_equal(traceU);	       if (traceU > EIGS_TRACE_U_THRESHOLD * U.p) {		    mpi_one_printf("    re-orthonormalizing Y\n");		    sqmatrix_sqrt(S1, U, S2); /* S1 = 1/sqrt(Yt*Y) */		    evectmatrix_XeYS(G, Y, S1, 1); /* G = orthonormalize Y */		    evectmatrix_copy(Y, G);		    prev_traceGtX = 0.0;		    evectmatrix_XtX(YtY, Y, S2);		    y_norm = sqrt(SCALAR_RE(sqmatrix_trace(YtY)) / Y.p);		    blasglue_rscal(Y.p * Y.n, 1/y_norm, Y.data, 1);		    blasglue_rscal(Y.p * Y.p, 1/(y_norm*y_norm), YtY.data, 1);		    sqmatrix_copy(U, YtY);		    sqmatrix_invert(U, 1, S2);	       }	  }	  TIME_OP(time_AZ, A(Y, X, Adata, 1, G)); /* X = AY; G is scratch */	  /* G = AYU; note that U is Hermitian: */	  TIME_OP(time_ZS, evectmatrix_XeYS(G, X, U, 1));	  TIME_OP(time_ZtW, evectmatrix_XtY(YtAYU, Y, G, S2));	  E = SCALAR_RE(sqmatrix_trace(YtAYU));	  CHECK(!BADNUM(E), "crazy number detected in trace!!\n");	  mpi_assert_equal(E);	  convergence_history[iteration % EIG_HISTORY_SIZE] =	       200.0 * fabs(E - prev_E) / (fabs(E) + fabs(prev_E));	  	  if (iteration > 0 && mpi_is_master() &&	      ((flags & EIGS_VERBOSE) ||	       MPIGLUE_CLOCK_DIFF(MPIGLUE_CLOCK, prev_feedback_time)	       > FEEDBACK_TIME)) {               printf("    iteration %4d: "                      "trace = %0.16g (%g%% change)\n", iteration, E,		      convergence_history[iteration % EIG_HISTORY_SIZE]);	       if (flags & EIGS_VERBOSE)		    debug_output_malloc_count();	       fflush(stdout); /* make sure output appears */               prev_feedback_time = MPIGLUE_CLOCK; /* reset feedback clock */          }	  if (iteration > 0 &&              fabs(E - prev_E) < tolerance * 0.5 * (E + prev_E + 1e-7))               break; /* convergence!  hooray! */	  	  /* Compute gradient of functional: G = (1 - Y U Yt) A Y U */	  sqmatrix_AeBC(S1, U, 0, YtAYU, 0);	  evectmatrix_XpaYS(G, -1.0, Y, S1, 1);	  /* set X = precondition(G): */	  if (K != NULL) {	       TIME_OP(time_KZ, K(G, X, Kdata, Y, NULL, YtY));	       /* Note: we passed NULL for eigenvals since we haven't                  diagonalized YAY (nor are the Y's orthonormal). */	  }	  else               evectmatrix_copy(X, G);  /* preconditioner is identity */	  	  /* We have to apply the constraint here, in case it doesn't             commute with the preconditioner. */	  if (constraint)
eigensolver.c - 源码说明

本页面展示了「MIT开发出来的计算光子晶体的软件」中的 eigensolver.c 源码文件，采用 C语言编程语言编写，共 647 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与MIT相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?