cg.c

来自「快速傅立叶变换程序代码,学信号的同学,可要注意了」· C语言 代码 · 共 1,027 行 · 第 1/2 页

C
1,027
字号
      return;    }    fp=(*fret);    for (i=1;i<=n;i++) dg[i]=g[i];    *fret=(*func)(p,func_arg);    (*dfunc)(p,g,dfunc_arg);    for (i=1;i<=n;i++) dg[i]=g[i]-dg[i];    for (i=1;i<=n;i++) {      hdg[i]=0.0;      for (j=1;j<=n;j++) hdg[i] += hessin[i][j]*dg[j];    }    fac=fae=0.0;    for (i=1;i<=n;i++) {      fac += dg[i]*xi[i];      fae += dg[i]*hdg[i];    }    fac=1.0/fac;    fad=1.0/fae;    for (i=1;i<=n;i++) dg[i]=fac*xi[i]-fad*hdg[i];    for (i=1;i<=n;i++)      for (j=1;j<=n;j++)	hessin[i][j] += fac*xi[i]*xi[j]	  -fad*hdg[i]*hdg[j]+fae*dg[i]*dg[j];    for (i=1;i<=n;i++) {      xi[i]=0.0;      for (j=1;j<=n;j++) xi[i] -= hessin[i][j]*g[j];    }  }  /*	nrerror("Too many iterations in DFPMIN2"); */  printf("Too many iterations in DFPMIN2, but continuing.\n");   free_dvector(hdg,1,n);  free_dvector(dg,1,n);  free_dvector(g,1,n);  free_dvector(xi,1,n);  return;}#undef ITMAX#undef EPS/* checkgrad.c *//* Examines objective function and d_objective function to see if    they agree for a step of size epsilon */void checkgrad  (double *p,   int    n,   double epsilon,   double (*func)(double *, void *),   void   *func_arg,   void   (*dfunc)(double *,double *, void *),   void   *dfunc_arg){  int j;  double f1;  double *g,*h;    h=dvector(1,n);  g=dvector(1,n);  f1=(*func)(p,func_arg);  (*dfunc)(p,g,dfunc_arg);  printf("Testing gradient evaluation\n");  printf("      analytic     1st_diffs\n");  for ( j = 1 ; j <= n ; j ++ ) {    p[j] += epsilon ;    h[j] = (*func)(p,func_arg) - f1 ;    p[j] -= epsilon ;    printf("%2d %9.5g %9.5g\n" , j , g[j] , h[j]/epsilon );    fflush(stdout) ;   }  free_dvector(h,1,n);  free_dvector(g,1,n);}/*static int lnsrch(double *fret, double *xmin, 		     double (*fn1d)( double * , void * ),		     void   (*dfn1d)(double *, double * , void * ),		     double n_over_grad){  double f0, df0, f1, f2, f_target, y1, y2, mu, nu, a;  double l1, l2, l_min = 0.1 , l_max = 0.5 ;  int      i, status;  int      done = 0;  double offset = 0;  double scale  = 0;  */ /* Pick a scale size (small if it's very hilly) */ /*  if (n_over_grad > 1.0)    scale = n_over_grad * global->lnsrch_max_step;  else    scale = global->lnsrch_max_step;  */ /* Never go further than the Newton step though */ /*  if (scale > 1.0)    scale = 1.0;  */ /* Get the gradient for starting point */ /*  status = (*dfn1d)(offset, &f0, &df0);  df0 = df0 * scale;  */ /* Check that +ve lambda sends us downhill */ /*  if (df0 > 0)    {      scale = -scale;      df0   = -df0;    }  */ /* Try the full Newton jump */ /*  l1 = 1;  f1 =  (*fn1d)(l1 * scale + offset, );  f_target = f0 + global->lnsrch_alpha * df0 * l1;  if (f1 < f_target)    {      *fret = f1;      *xmin = l1 * scale + offset;      return 0;    }    */ /* Now try a quadratic approx */ /*  l_min = global->lnsrch_min_lambda;  l2 = - df0 / (2 * (f1 - f0 - df0));  if (l2 < l_min)    {      l2   = l_min;      done = 1;    }  f2  = (*fn1d)(l2 * scale + offset, );  f_target = f0 + global->lnsrch_alpha * df0 * l2;  if ( (f2 < f_target) || (done) )     {      *fret = f2;      *xmin = l2 * scale + offset;      return 0;    }    */ /* OK we're in cubicville AZ */ /*  y1 = f1 - f0 - l1 * df0;  y2 = f2 - f0 - l2 * df0;    for(i = 0; i < global->lnsrch_max_iters; i++)    {      mu = ( (l2 * l2 * l2 * y1 - l1 * l1 * l1 * y2) / 	    (l2 * l2 * y1 - l1 * l1 * y2));      a  = (l1 * l1 * l2 * l2 * (l1 - l2)) / (l2 * l2 * y1 - l1 * l1 * y2);            nu = mu * mu - 3 * df0 * a;      if (nu < 0)	{	  fprintf( stderr , "lnsrch: Error (4) Probable roundoff error\n");	  *fret = f2;	  *xmin = l2 * scale + offset;	  return -4;	}            l_min = global->lnsrch_min_lambda * l1;      l_max = global->lnsrch_max_lambda * l1;      l1 = l2;      f1 = f2;      y1 = y2;      l2 = (mu + sqrt(nu)) / 3.0;      if (l2 < l_min)	{	  l2   = l_min;	  done = 1;	}      else if (l2 > l_max)	{	  l2 = l_max;	}      f2 = (*fn1d)(l2 * scale + offset, );            f_target = f0 + global->lnsrch_alpha * df0 * l2;      if ( (f2 < f_target) || (done) )	{	  *fret = f2;	  *xmin = l2 * scale + offset;	  return 0;	}        y2 = f2 - f0 - l2 * df0;    }  fprintf( stderr , "lnsrch: Error (5) Too many iterations - %12.6g at %12.6g\n",	    f2, l2 * scale + offset);  *xmin = l2 * scale + offset;  *fret = f2;    return -5;}*//*    David MacKay's optimizer, based on conjugate gradient ideas,    but using bracketing of the zero of the inner product              (gradient).(line_search_direction)   to do the line minimization. Only derivative calculations are required.   The length of the first step in the line search (often set to "1.0"   in other code) is adapted here so that, if 0.00001 is a better step size,    it soon cottons on to that and saves ~log(10000) bracketing operations.   The result is that (with rich set to 0) the program can use    as few as 2 derivatives per line search. (If rich is set to 1, it does    an extra derivative calculation at the beginning of each line search    making a minimum of 3 per line search. Set rich=0 if you think    that the surface is locally quite quadratic.) If the program does average    2 derivatives per line search then it must be superior to most cg methods    including use of Rbackprop (which costs 2 derivatives straight off)   A possible modification: where the function can be returned at same    time as the dfunction --- there is nothing clever to do with the    value, but it could be used as a sanity check and a convergence criterion.    See end of file for further discussion.   NB: The value of "tol" is totally arbitrary and must be set by    you to a value that works well for your problem.    It depends completely on the typical value of the gradient / step size.    Tol specifies a magnitude of gradient at which a halt is called.    or a step size.*/#define EPS 1.0e-10#define FREEALL free_dvector(xi,1,n);free_dvector(h,1,n);free_dvector(g,1,n);  free_dvector ( pt , 1 , n ) ;   free_dvector ( gx , 1 , n ) ;   free_dvector ( gy , 1 , n ) ;  void macopt  (double *p,            /* starting vector                                */   int    n,             /* number of dimensions                           */   int    type,          /* communicates what sort of opt to do            */                         /* if type is even (eg 0) then no fresh gradient 			    is evaluated at the start of each line search, 			    reducing the number of gradients 		            by one per loop; `rich' is defined to be			    type % 2                                       			    if type/10, then tol defines 			    a step length criterion. 			    else tol defines a gradient magnitude criterion */   double tol,    /* convergence declared when the gradient vector is smaller		     in magnitude than this, or when the mean absolute 		     step is less than this (see above) */   int    *its,   /* or when the number of iterations                      */   int    itmax,  /* reaches itmax                                         */   void   (*dfunc)(double *,double *, void *), /* evaluates the gradient   */   void   *dfunc_arg   )                     /* Note, (*func)(double *,void *) is not used     */{  int j ;  double gg,gam,dgg;  double *g,*h,*xi;  int rich = type % 2 ;  int end_if_small_step = type / 10 ;   int end_if_small_grad = ( end_if_small_step ) ? 0 : 1 ;   double step ;  double *pt , *gx , *gy ;         /* vectors for linmin to use */  /* A total of 7 double * 1..n are used by this optimizer.      p           is provided when the optimizer is called      pt          is used by the line minimizer as the temporary vector.                     this could be cut out with minor rewriting, using p alone     g, h and xi are used by the cg method as in NR - could one of these                     be cut out?     the line minimizer uses an extra gx and gy to evaluate two gradients.      */#define MACOPTVERBOSE 2    g = dvector ( 1 , n ) ;  h = dvector ( 1 , n ) ;  xi = dvector ( 1 , n ) ;  pt = dvector(1,n) ; /* scratch vector */  gx = dvector(1,n) ; /* scratch gradient */  gy = dvector(1,n) ; /* scratch gradient */  (*dfunc)(p,xi,dfunc_arg);  for (j=1;j<=n;j++) {    g[j] = -xi[j];    xi[j]=h[j]=g[j];  }  for ( *its = 1 ; *its <= itmax ; *its = *its + 1 ) {    for ( gg = 0.0 , j = 1 ; j <= n ; j ++ )       gg += g[j]*g[j];          /* find the magnitude of the old gradient */    if ( MACOPTVERBOSE > 0 )       printf ( "mac_it %d of %d : gg = %6.3g tol = %6.3g: ", *its , itmax , gg , tol ) ;    /* do a pseudo line minimization */    if ( end_if_small_grad && gg <= tol ) {      FREEALL      if ( MACOPTVERBOSE > 0 ) printf ("\n");      return;    }    step = maclinmin ( p , xi , n , dfunc , dfunc_arg , pt , gx , gy ) ;     if ( MACOPTVERBOSE > 1 ) printf (" (step %9.5g)",step);    if ( MACOPTVERBOSE > 0 ) printf ("\n");    if ( end_if_small_step && step <= tol ) {      FREEALL      return;    }    /* Method:        evaluate gradient at a sequence of points and calculate the inner        product with the line search direction. Continue until a        bracketing is achieved ( i.e a change in sign ). */    /* if there is a step length criterion, look at step size... */        /* if we are feeling rich, evaluate the gradient at the new `minimum' */    /* alternatively, linmin constructs this gradient by linear combination        of the last two evaluations and leaves it in xi */    if ( rich ) {       (*dfunc)(p,xi,dfunc_arg);     }    dgg=0.0;    for (j=1;j<=n;j++) {      dgg += (xi[j]+g[j])*xi[j];    }    gam=dgg/gg;    for (j=1;j<=n;j++) {      g[j] = -xi[j];                /* g stores (-) the most recent gradient */      xi[j]=h[j]=g[j]+gam*h[j];     /* h stores xi, the current line direction */    }  }  fprintf(stderr,"Reached iteration limit in MACOPT; continuing.\n");   FREEALL	  return;} /* NB this leaves the best value of p in the p vector, but     the function has not been evaluated there if rich=0     */#undef EPS#undef FREEALL#define G1 2.0#define G2 1.25#define G3 1.5#define MAXITS 30double maclinmin ( double *p , double *xi , int n ,     void   (*dfunc)(double *,double *, void *), /* evaluates the gradient */ void   *arg , double *pt , double *gx , double *gy ){  static double lastx = 0.01 ; /* 1.0 might make general sense, (cf N.R.)				  but the best setting of all is to have 				  a prior idea of the eigenvalues. If 				  the objective function is equal to sum of N				  terms then set this to 1/N, for example 				  Err on the small side to be conservative. */  double x , y ;  double s , t , m ;  int    its = 1 , i ;  double step , tmpd ;   x = lastx ;  s = macprod ( p , xi , pt , gx , x , n , dfunc , arg ) ;     /* at x=0, the gradient (uphill) satisfies s < 0 */  if ( s < 0 )  {  /* we need to go further */    do {      y = x * G1 ;      t = macprod ( p , xi , pt , gy , y , n , dfunc , arg ) ;       if ( MACOPTVERBOSE > 1 ) 	printf ("s = %6.3g: t = %6.3g; x = %6.3g y = %6.3g\n",s, t , x , y );      if ( t >= 0 ) break ;      x = y ; s = t ; for ( i = 1 ; i <= n ; i ++ ) gx[i] = gy[i] ;    }    while ( its++ < MAXITS ) ;  } else if ( s > 0 ) { /* need to step back inside interval */    do {      y = x / G3 ;      t = macprod ( p , xi , pt , gy , y , n , dfunc , arg ) ;       if ( MACOPTVERBOSE > 1 ) 	printf ("s = %6.3g: t = %6.3g; x = %6.3g y = %6.3g\n",s, t , x , y );      if ( t <= 0 ) break ;      x = y ; s = t ; for ( i = 1 ; i <= n ; i ++ ) gx[i] = gy[i] ;    } while ( its++ < MAXITS ) ;  } else { /* hole in one s = 0.0 */    t = 1.0 ; y = x;  }  if ( its >= MAXITS ) {/* this can happen where the function goes \_ and doesn't buck up again */    fprintf (stderr, "Warning! maclinmin overran\n" );    fprintf (stderr, "- inner product at 0 = %9.4g\n" ,	     macprod ( p , xi , pt , gy , 0.0 , n , dfunc , arg ) ) ;   } /*  linear interpolate between the last two */  if ( s < 0.0 ) s = - s ;  if ( t < 0.0 ) t = - t ;  m = ( s + t ) ;  s /= m ; t /= m ;    m =  s * y + t * x ;   /* evaluate the step length, not that it necessarily means anything */  for ( step = 0.0 , i = 1 ; i <= n ; i ++ ) {    tmpd = m * xi[i] ;    p[i] += tmpd ;    step += fabs ( tmpd ) ;     xi[i] = s * gy[i] + t * gx[i] ;/* send back the estimated gradient in xi (NB not like linmin) */  }  lastx = m * G2 ;    return ( step / (double) ( n ) ) ; }#undef G2#undef G1#undef G3#undef MAXITSdouble macprod (  double *p , double *xi , double *pt , double *gx , double x , int n , void   (*dfunc)(double *,double *, void *),  void   *arg) {  /* finds pt = p + x xi and gets gx there, 				       returning gx . xi */  int i;  double s = 0.0 ;  for ( i = 1 ; i <= n ; i ++ )     pt[i] = p[i] + x * xi[i] ;    dfunc( pt , gx , arg ) ;  for ( i = 1 ; i <= n ; i ++ )     s += gx[i] * xi[i] ;  return s ;}    /*    Explanation of the macopt code    ------------------------------Over the last year or two I have realised that the conjugate gradient code in the Numerical Recipes book is very poorly written. * In their algorithm is that the initial step size is always `1', even though this may be a very inappropriate step size. If this initial step size is too big then the routine mnbrak makes the next step equal to `-1.6': NB, this is in the wrong direction!And it is an even bigger step than the first one, so it is likely to give even worse numerical problems. Imagine, for the sake of discussion, that the true minimum is at a step size of about 0.001 on each line search. Then the Numerical Recipes algorithm will waste a lot of time creating silly guesses -- about log_2 1000 of them, in fact. So about 10 unnecessary function evaluationsare done on every line search, whereas if the initial step size were 0.001, then only a few function evaluations would be needed to locate the minimum. * Another criticism of the NR code is that they do not use gradient information in the line search. They mention that it is possible to use gradient information, but their example code does not use it. In fact, with gradient information, it is easier to find the line minimum, because you can bracket the minimum with only two gradient evaluations. * Finally, it is not necessary to locate the line minimum as accuratelyas linmin does. I have written my own conjugate gradient algorithm that attempts to improve on the NR code. My algorithm is called macopt. It has the following properties:	1) an adaptive step size is used for the initial step of the line 		search	2) gradients are used in the line search	3) once the minimum has been bracketed, the line search 		terminates immediately, and a new line search commences		using interpolation to estimate the location of the minimum 		and the value of the gradient there. 	4) typically, when the routine has adapted its step size, two 		gradient evaluations per line minimization are performed.I find that this algorithm sometimes is ten times faster than the NR code. The Xerion group at University of Toronto have also written their own conjugate gradient optimizers, and they say that they have adaptive conjugate gradient optimizers that only need one gradient evaluation per line search!You can find macopt on wol in ~mackay/ansi/cg.cIf you also get test_mac.c and test_function.c and nrutil.c and r.c and r.h and mynr.hyou will be able to compile a demonstration program called test_mac which uses macopt to minimize a quadratic function. *//*<!-- hhmts start -->Last modified: Tue Aug  6 17:15:42 1996<!-- hhmts end -->*/

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?