gridsearch.java

来自「代码是一个分类器的实现,其中使用了部分weka的源代码。可以将项目导入eclip」· Java 代码 · 共 2,292 行 · 第 1/5 页
JAVA
2,292 行
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * GridSearch.java * Copyright (C) 2006 University of Waikato, Hamilton, New Zealand */package weka.classifiers.meta;import weka.classifiers.Classifier;import weka.classifiers.Evaluation;import weka.classifiers.RandomizableSingleClassifierEnhancer;import weka.classifiers.functions.LinearRegression;import weka.core.AdditionalMeasureProducer;import weka.core.Capabilities;import weka.core.Debug;import weka.core.Instance;import weka.core.Instances;import weka.core.MathematicalExpression;import weka.core.Option;import weka.core.OptionHandler;import weka.core.PropertyPath;import weka.core.SelectedTag;import weka.core.SerializedObject;import weka.core.Tag;import weka.core.Utils;import weka.core.Capabilities.Capability;import weka.filters.Filter;import weka.filters.supervised.attribute.PLSFilter;import weka.filters.unsupervised.attribute.MathExpression;import weka.filters.unsupervised.attribute.NumericCleaner;import weka.filters.unsupervised.instance.Resample;import java.beans.PropertyDescriptor;import java.io.File;import java.io.Serializable;import java.util.Collections;import java.util.Comparator;import java.util.Enumeration;import java.util.HashMap;import java.util.Hashtable;import java.util.Iterator;import java.util.Random;import java.util.Vector;/** <!-- globalinfo-start --> * Performs a grid search of parameter pairs for the a classifier (Y-axis, default is LinearRegression with the "Ridge" parameter) and the PLSFilter (X-axis, "# of Components") and chooses the best pair found for the actual predicting.<br/> * <br/> * The initial grid is worked on with 2-fold CV to determine the values of the parameter pairs for the selected type of evaluation (e.g., accuracy). The best point in the grid is then taken and a 10-fold CV is performed with the adjacent parameter pairs. If a better pair is found, then this will act as new center and another 10-fold CV will be performed (kind of hill-climbing). This process is repeated until no better pair is found or the best pair is on the border of the grid.<br/> * In case the best pair is on the border, one can let GridSearch automatically extend the grid and continue the search. Check out the properties 'gridIsExtendable' (option '-extend-grid') and 'maxGridExtensions' (option '-max-grid-extensions &lt;num&gt;').<br/> * <br/> * GridSearch can handle doubles, integers (values are just cast to int) and booleans (0 is false, otherwise true). float, char and long are supported as well.<br/> * <br/> * The best filter/classifier setup can be accessed after the buildClassifier call via the getBestFilter/getBestClassifier methods.<br/> * Note on the implementation: after the data has been passed through the filter, a default NumericCleaner filter is applied to the data in order to avoid numbers that are getting too small and might produce NaNs in other schemes. * <p/> <!-- globalinfo-end --> *  <!-- options-start --> * Valid options are: <p/> *  * <pre> -E &lt;CC|RMSE|RRSE|MAE|RAE|COMB|ACC&gt; *  Determines the parameter used for evaluation: *  CC = Correlation coefficient *  RMSE = Root mean squared error *  RRSE = Root relative squared error *  MAE = Mean absolute error *  RAE = Root absolute error *  COMB = Combined = (1-abs(CC)) + RRSE + RAE *  ACC = Accuracy *  (default: CC)</pre> *  * <pre> -y-property &lt;option&gt; *  The Y option to test (without leading dash). *  (default: classifier.ridge)</pre> *  * <pre> -y-min &lt;num&gt; *  The minimum for Y. *  (default: -10)</pre> *  * <pre> -y-max &lt;num&gt; *  The maximum for Y. *  (default: +5)</pre> *  * <pre> -y-step &lt;num&gt; *  The step size for Y. *  (default: 1)</pre> *  * <pre> -y-base &lt;num&gt; *  The base for Y. *  (default: 10)</pre> *  * <pre> -y-expression &lt;expr&gt; *  The expression for Y. *  Available parameters: *   BASE *   FROM *   TO *   STEP *   I - the current iteration value *   (from 'FROM' to 'TO' with stepsize 'STEP') *  (default: 'pow(BASE,I)')</pre> *  * <pre> -filter &lt;filter specification&gt; *  The filter to use (on X axis). Full classname of filter to include,  *  followed by scheme options. *  (default: weka.filters.supervised.attribute.PLSFilter)</pre> *  * <pre> -x-property &lt;option&gt; *  The X option to test (without leading dash). *  (default: filter.numComponents)</pre> *  * <pre> -x-min &lt;num&gt; *  The minimum for X. *  (default: +5)</pre> *  * <pre> -x-max &lt;num&gt; *  The maximum for X. *  (default: +20)</pre> *  * <pre> -x-step &lt;num&gt; *  The step size for X. *  (default: 1)</pre> *  * <pre> -x-base &lt;num&gt; *  The base for X. *  (default: 10)</pre> *  * <pre> -x-expression &lt;expr&gt; *  The expression for the X value. *  Available parameters: *   BASE *   MIN *   MAX *   STEP *   I - the current iteration value *   (from 'FROM' to 'TO' with stepsize 'STEP') *  (default: 'pow(BASE,I)')</pre> *  * <pre> -extend-grid *  Whether the grid can be extended. *  (default: no)</pre> *  * <pre> -max-grid-extensions &lt;num&gt; *  The maximum number of grid extensions (-1 is unlimited). *  (default: 3)</pre> *  * <pre> -sample-size &lt;num&gt; *  The size (in percent) of the sample to search the inital grid with. *  (default: 100)</pre> *  * <pre> -traversal &lt;ROW-WISE|COLUMN-WISE&gt; *  The type of traversal for the grid. *  (default: COLUMN-WISE)</pre> *  * <pre> -log-file &lt;filename&gt; *  The log file to log the messages to. *  (default: none)</pre> *  * <pre> -S &lt;num&gt; *  Random number seed. *  (default 1)</pre> *  * <pre> -D *  If set, classifier is run in debug mode and *  may output additional info to the console</pre> *  * <pre> -W *  Full name of base classifier. *  (default: weka.classifiers.functions.LinearRegression)</pre> *  * <pre>  * Options specific to classifier weka.classifiers.functions.LinearRegression: * </pre> *  * <pre> -D *  Produce debugging output. *  (default no debugging output)</pre> *  * <pre> -S &lt;number of selection method&gt; *  Set the attribute selection method to use. 1 = None, 2 = Greedy. *  (default 0 = M5' method)</pre> *  * <pre> -C *  Do not try to eliminate colinear attributes. * </pre> *  * <pre> -R &lt;double&gt; *  Set ridge parameter (default 1.0e-8). * </pre> *  * <pre>  * Options specific to filter weka.filters.supervised.attribute.PLSFilter ('-filter'): * </pre> *  * <pre> -D *  Turns on output of debugging information.</pre> *  * <pre> -C &lt;num&gt; *  The number of components to compute. *  (default: 20)</pre> *  * <pre> -U *  Updates the class attribute as well. *  (default: off)</pre> *  * <pre> -M *  Turns replacing of missing values on. *  (default: off)</pre> *  * <pre> -A &lt;SIMPLS|PLS1&gt; *  The algorithm to use. *  (default: PLS1)</pre> *  * <pre> -P &lt;none|center|standardize&gt; *  The type of preprocessing that is applied to the data. *  (default: center)</pre> *  <!-- options-end --> * * Examples: * <ul> *   <li> *     <b>Optimizing SMO with RBFKernel (C and gamma)</b> *     <ul> *       <li>Set the evaluation to <i>Accuracy</i>.</li> *       <li>Set the filter to <code>weka.filters.AllFilter</code> since we *           don't need any special data processing and we don't optimize the *           filter in this case (data gets always passed through filter!).</li> *       <li>Set <code>weka.classifiers.functions.SMO</code> as classifier *           with <code>weka.classifiers.functions.supportVector.RBFKernel</code> *           as kernel. *       </li> *       <li>Set the XProperty to "classifier.c", XMin to "1", XMax to "16",  *           XStep to "1" and the XExpression to "I". This will test the "C" *           parameter of SMO for the values from 1 to 16.</li> *       <li>Set the YProperty to "classifier.kernel.gamma", YMin to "-5", *           YMax to "2", YStep to "1" YBase to "10" and YExpression to  *           "pow(BASE,I)". This will test the gamma of the RBFKernel with the *           values 10^-5, 10^-4,..,10^2.</li> *     </ul> *   </li> *   <li> *     <b>Optimizing PLSFilter with LinearRegression (# of components and ridge) - default setup</b> *     <ul> *       <li>Set the evaluation to <i>Correlation coefficient</i>.</li> *       <li>Set the filter to <code>weka.filters.supervised.attribute.PLSFilter</code>.</li> *       <li>Set <code>weka.classifiers.functions.LinearRegression</code> as  *           classifier and use no attribute selection and no elimination of *           colinear attributes.</li> *       <li>Set the XProperty to "filter.numComponents", XMin to "5", XMax  *           to "20" (this depends heavily on your dataset, should be no more *           than the number of attributes!), XStep to "1" and XExpression to *           "I". This will test the number of components the PLSFilter will *           produce from 5 to 20.</li> *       <li>Set the YProperty to "classifier.ridge", XMin to "-10", XMax to  *           "5", YStep to "1" and YExpression to "pow(BASE,I)". This will *           try ridge parameters from 10^-10 to 10^5.</li> *     </ul> *   </li> * </ul> *  * General notes: * <ul> *   <li>Turn the <i>debug</i> flag on in order to see some progress output in the *       console</li> *   <li>If you want to view the fitness landscape that GridSearch explores, *       select a <i>log file</i>. This log will then contain Gnuplot data and  *       script block for viewing the landscape. Just copy paste those blocks  *       into files named accordingly and run Gnuplot with them.</li> * </ul> * * @author  Bernhard Pfahringer (bernhard at cs dot waikato dot ac dot nz) * @author  Geoff Holmes (geoff at cs dot waikato dot ac dot nz) * @author  fracpete (fracpete at waikato dot ac dot nz) * @version $Revision: 1.3 $ * @see     PLSFilter * @see     LinearRegression * @see	    NumericCleaner */public class GridSearch  extends RandomizableSingleClassifierEnhancer  implements AdditionalMeasureProducer {  /**   * a serializable version of Point2D.Double   *    * @see java.awt.geom.Point2D.Double   */  protected class PointDouble    extends java.awt.geom.Point2D.Double    implements Serializable {    /** for serialization */    private static final long serialVersionUID = 7151661776161898119L;        /**     * the default constructor     *      * @param x		the x value of the point     * @param y		the y value of the point     */    public PointDouble(double x, double y) {      super(x, y);    }        /**     * returns a string representation of the Point     *      * @return the point as string     */    public String toString() {      return super.toString().replaceAll(".*\\[", "[");    }  }  /**   * a serializable version of Point   *    * @see java.awt.Point   */  protected class PointInt    extends java.awt.Point    implements Serializable {    /** for serialization */    private static final long serialVersionUID = -5900415163698021618L;    /**     * the default constructor     *      * @param x		the x value of the point     * @param y		the y value of the point     */    public PointInt(int x, int y) {      super(x, y);    }        /**     * returns a string representation of the Point     *      * @return the point as string     */    public String toString() {      return super.toString().replaceAll(".*\\[", "[");    }  }    /**   * for generating the parameter pairs in a grid   */  protected class Grid    implements Serializable {    /** for serialization */    private static final long serialVersionUID = 7290732613611243139L;        /** the minimum on the X axis */    protected double m_MinX;        /** the maximum on the X axis */    protected double m_MaxX;        /** the step size for the X axis */    protected double m_StepX;    /** the label for the X axis */    protected String m_LabelX;        /** the minimum on the Y axis */    protected double m_MinY;        /** the maximum on the Y axis */    protected double m_MaxY;        /** the step size for the Y axis */    protected double m_StepY;    /** the label for the Y axis */    protected String m_LabelY;        /** the number of points on the X axis */    protected int m_Width;        /** the number of points on the Y axis */    protected int m_Height;        /**     * initializes the grid     *      * @param minX 	the minimum on the X axis     * @param maxX 	the maximum on the X axis     * @param stepX 	the step size for the X axis     * @param minY 	the minimum on the Y axis     * @param maxY 	the maximum on the Y axis     * @param stepY 	the step size for the Y axis     */    public Grid(double minX, double maxX, double stepX, 	        double minY, double maxY, double stepY) {      this(minX, maxX, stepX, "", minY, maxY, stepY, "");    }        /**     * initializes the grid     *      * @param minX 	the minimum on the X axis     * @param maxX 	the maximum on the X axis     * @param stepX 	the step size for the X axis     * @param labelX	the label for the X axis     * @param minY 	the minimum on the Y axis     * @param maxY 	the maximum on the Y axis     * @param stepY 	the step size for the Y axis     * @param labelY	the label for the Y axis     */    public Grid(double minX, double maxX, double stepX, String labelX,	        double minY, double maxY, double stepY, String labelY) {      super();            m_MinX   = minX;      m_MaxX   = maxX;      m_StepX  = stepX;      m_LabelX = labelX;      m_MinY   = minY;      m_MaxY   = maxY;      m_StepY  = stepY;      m_LabelY = labelY;      m_Height = (int) StrictMath.round((m_MaxY - m_MinY) / m_StepY) + 1;      m_Width  = (int) StrictMath.round((m_MaxX - m_MinX) / m_StepX) + 1;    }        /**     * returns the left border     *      * @return 		the left border     */    public double getMinX() {      return m_MinX;    }        /**     * returns the right border     *
gridsearch.java - 源码说明

本页面展示了「代码是一个分类器的实现,其中使用了部分weka的源代码。可以将项目导入eclipse运行」中的 gridsearch.java 源码文件，采用 Java 编程语言编写，共 2,292 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与eclipse相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?