📄 cgminimizer.java

📁 Standord Classifier实现了一个基于Java的最大熵分类器。用于模式识别
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
上一页 12
	ok1 = ((a-u1)*(u1-b) > 0.0 && dx*d1 <= 0.0);	ok2 = ((a-u2)*(u2-b) > 0.0 && dx*d2 <= 0.0);	olde = e;	e=d;	if (ok1 || ok2) {	  if (ok1 && ok2)	    d = (fabs(d1) < fabs(d2) ? d1 : d2);	  else if (ok1)	    d = d1;	  else	    d = d2;	  if (fabs(d) <= fabs(0.5*olde)) {	    u = x+d;	    if (u-a < tol2 || b-u < tol2)	      d = sign(tol1, xm-x);	  } else {	    e = (dx >= 0.0 ? a-x : b-x);	    d = 0.5*e;	  }	} else {	  e = (dx >= 0.0 ? a-x : b-x);	  d = 0.5*e;	}      } else {	e = (dx >= 0.0 ? a-x : b-x);	d = 0.5*e;      }      if (fabs(d) >= tol1) {	u = x+d;	fu = function.valueAt(u);      } else {	u = x+sign(tol1,d);	fu = function.valueAt(u);	if (fu > fx) {	  if (dbVerbose)	    System.err.println("dbrent returning because derivative is broken");	  return x;	}      }      du = function.derivativeAt(u);      if (fu <= fx) {	if (u >= x)	  a=x;	else	  b=x;	v=w;	fv=fw;	dv=dw;	w=x;	fw=fx;	dw=dx;	x=u;	fx=fu;	dx=du;      } else {	if (u < x)	  a=u;	else	  b=u;	if (fu <= fw || w == x) {	  v=w;	  fv=fw;	  dv=dw;	  w=u;	  fw=fu;	  dw=du;	} else if (fu < fv || v == x || v == w) {	  v=u;	  fv=fu;	  dv=du;	}      }    }    // dan's addition:    if (fx < function.valueAt(0.0))      return x;    System.err.println("Warning: exiting dbrent because ITER exceeded!");    return 0.0;  }  private class Triple {    public double a;    public double b;    public double c;    public Triple(double a, double b, double c) {      this.a = a;      this.b = b;      this.c = c;    }  }  //public double lastXx = 1.0;  double[] lineMinimize(DiffFunction function, double[] initial, double[] direction) {    // make a 1-dim function along the direction line    // THIS IS A HACK (but it's the NRiC peoples' hack)    OneDimDiffFunction oneDim = new OneDimDiffFunction(function, initial, direction);    // do a 1-dim line min on this function    //Double Ax = new Double(0.0);    //Double Xx = new Double(1.0);    //Double Bx = new Double(0.0);    // bracket the extreme pt    double guess = 1.0;    guess = 0.01;    //System.err.println("Current "+oneDim.valueAt(0)+" nudge "+(oneDim.smallestZeroPositiveLocation()*1e-2)+" "+oneDim.valueAt(oneDim.smallestZeroPositiveLocation()*1e-5));    if (! silent) System.err.print("[");    Triple bracketing = null;    bracketing = mnbrak(new Triple(0,guess,0), oneDim);    if (! silent) System.err.print("]");    double ax = bracketing.a;    double xx = bracketing.b;    double bx = bracketing.c;    //lastXx = xx;    // CHECK FOR END OF WORLD    if (!(ax <= xx && xx <= bx) &&	!(bx <= xx && xx <= ax))      System.err.println("Bad bracket order!");    if (verbose) {      System.err.println("Bracketing found: "+ax+" "+xx+" "+bx);      System.err.println("Bracketing found: "+oneDim.valueAt(ax)+" "+oneDim.valueAt(xx)+" "+oneDim.valueAt(bx));    //System.err.println("Bracketing found: "+arrayToString(oneDim.vectorOf(ax),3)+" "+arrayToString(oneDim.vectorOf(xx),3)+" "+arrayToString(oneDim.vectorOf(bx),3));    }    // find the extreme pt    if (! silent) System.err.print("<");    double xmin = dbrent(oneDim, ax, xx, bx);    if (! silent) System.err.print(">");    // return the full vector    //System.err.println("Went "+xmin+" during lineMinimize");    return oneDim.vectorOf(xmin);  }  public double[] minimize(Function function, double functionTolerance, double[] initial) {    // check for derivatives    if (! (function instanceof DiffFunction))      throw new UnsupportedOperationException();    DiffFunction dfunction = (DiffFunction)function;    int dimension = dfunction.domainDimension();    //lastXx = 1.0;        // evaluate function    double fp = dfunction.valueAt(initial);    if (verbose)      System.err.println("Initial: "+fp);    double[] xi = copyArray(dfunction.derivativeAt(initial));    if (verbose) {      System.err.println("Initial at: "+arrayToString(initial,numToPrint));      System.err.println("Initial deriv: "+arrayToString(xi,numToPrint));    }        // make some vectors    double[] g = new double[dimension];    double[] h = new double[dimension];    double[] p = new double[dimension];    for (int j=0; j<dimension; j++) {      g[j] = -1.0*xi[j];      xi[j] = g[j];      h[j] = g[j];      p[j] = initial[j];    }    // iterations    boolean simpleGDStep = false;    for (int iterations=1; iterations<ITMAX; iterations++) {      if (! silent) System.err.print("Iter "+iterations+" ");      // do a line min along descent direction      //System.err.println("Minimizing from ("+p[0]+","+p[1]+") along ("+xi[0]+","+xi[1]+")\n");      if (verbose)	System.err.println("Minimizing along "+arrayToString(xi,numToPrint));      //System.err.println("Current is "+fp);      double[] p2 = lineMinimize(dfunction,p,xi);      double fp2 = dfunction.valueAt(p2);      //System.err.println("Result is "+fp2+" (from "+fp+") at ("+p2[0]+","+p2[1]+")\n");      if (verbose) {	System.err.println("Result is "+fp2+" after "+iterations);	System.err.println("Result at "+arrayToString(p2,numToPrint));      }      //System.err.print(fp2+"|"+(int)(Math.log((fabs(fp2-fp)+1e-100)/(fabs(fp)+fabs(fp2)+1e-100))/Math.log(10)));      if (! silent) System.err.println(" "+fp2);      if (monitor != null) {	double monitorReturn = monitor.valueAt(p2);	if (monitorReturn < functionTolerance)	  return p2;      }      // check convergence      if (2.0*fabs(fp2-fp) <= functionTolerance*(fabs(fp2)+fabs(fp)+EPS)) {	// convergence	if (!checkSimpleGDConvergence || simpleGDStep || simpleGD) {	  return p2;	}	simpleGDStep = true;	//System.err.println("Switched to GD for a step.");      } else {	//if (!simpleGD)	//System.err.println("Switching to CGD.");	simpleGDStep = false;      }      // shift variables      for (int j=0; j<dimension; j++) {	xi[j] = p2[j] - p[j];	p[j] = p2[j];      }      fp = fp2;      // find the new gradient      xi = copyArray(dfunction.derivativeAt(p));      //System.err.print("mx "+arrayMax(xi)+" mn "+arrayMin(xi));      if (!simpleGDStep && !simpleGD && (iterations % resetFrequency != 0)) {	// do the magic -- part i	// (calculate some dot products we'll need)	double dgg = 0.0;	double gg = 0.0;	for (int j=0; j<dimension; j++) {	  // g dot g	  gg += g[j]*g[j];	  // grad dot grad	  // FR method is: 	  // dgg += x[j]*x[j];	  // PR method is:	  dgg += (xi[j]+g[j])*xi[j];	}		// check for miraculous convergence	if (gg == 0.0) {	  return p;	}		// magic part ii	// (update the sequence in a way that tries to preserve conjugacy)	double gam = dgg/gg;	for (int j=0; j<dimension; j++) {	  g[j] = -1.0*xi[j];	  h[j] = g[j]+gam*h[j];	  xi[j] = h[j];	}      } else {	// miraculous simpleGD convergence	double xixi = 0.0;	for (int j=0; j<dimension; j++) {	  xixi += xi[j]*xi[j];	}	// reset cgd	for (int j=0; j<dimension; j++) {	  g[j] = -1.0*xi[j];	  xi[j] = g[j];	  h[j] = g[j];	}	if (xixi == 0.0) {	  return p;	}      }    }    // too many iterations    System.err.println("Warning: exiting minimize because ITER exceeded!");    return p;      }  /**   * Basic constructor, use this.   *   */  public CGMinimizer() {    silent = true;  }  /**   * Pass in <code>false</code> to get per-iteration progress reports   * (to stderr).   *   * @param silent a <code>boolean</code> value   */  public CGMinimizer(boolean silent) {    this.silent = silent;  }  /**   * Perform minimization with monitoring.  After each iteration,    * monitor.valueAt(x) gets called, with the double array <code>x</code>    * being that iteration's ending point.  A return <code>&lt;   * tol</code> forces convergence (terminates the CG procedure).   * Specially for Kristina.   *   * @param monitor a <code>Function</code> value   */  public CGMinimizer(Function monitor) {    this();    this.monitor = monitor;  }}
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -