📄 gridsearch.java
字号:
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * GridSearch.java * Copyright (C) 2006 University of Waikato, Hamilton, New Zealand */package weka.classifiers.meta;import weka.classifiers.Classifier;import weka.classifiers.Evaluation;import weka.classifiers.RandomizableSingleClassifierEnhancer;import weka.classifiers.functions.LinearRegression;import weka.core.AdditionalMeasureProducer;import weka.core.Capabilities;import weka.core.Debug;import weka.core.Instance;import weka.core.Instances;import weka.core.MathematicalExpression;import weka.core.Option;import weka.core.OptionHandler;import weka.core.PropertyPath;import weka.core.SelectedTag;import weka.core.SerializedObject;import weka.core.Tag;import weka.core.Utils;import weka.core.Capabilities.Capability;import weka.filters.Filter;import weka.filters.supervised.attribute.PLSFilter;import weka.filters.unsupervised.attribute.MathExpression;import weka.filters.unsupervised.attribute.NumericCleaner;import weka.filters.unsupervised.instance.Resample;import java.beans.PropertyDescriptor;import java.io.File;import java.io.Serializable;import java.util.Collections;import java.util.Comparator;import java.util.Enumeration;import java.util.HashMap;import java.util.Hashtable;import java.util.Iterator;import java.util.Random;import java.util.Vector;/** <!-- globalinfo-start --> * Performs a grid search of parameter pairs for the a classifier (Y-axis, default is LinearRegression with the "Ridge" parameter) and the PLSFilter (X-axis, "# of Components") and chooses the best pair found for the actual predicting.<br/> * <br/> * The initial grid is worked on with 2-fold CV to determine the values of the parameter pairs for the selected type of evaluation (e.g., accuracy). The best point in the grid is then taken and a 10-fold CV is performed with the adjacent parameter pairs. If a better pair is found, then this will act as new center and another 10-fold CV will be performed (kind of hill-climbing). This process is repeated until no better pair is found or the best pair is on the border of the grid.<br/> * In case the best pair is on the border, one can let GridSearch automatically extend the grid and continue the search. Check out the properties 'gridIsExtendable' (option '-extend-grid') and 'maxGridExtensions' (option '-max-grid-extensions <num>').<br/> * <br/> * GridSearch can handle doubles, integers (values are just cast to int) and booleans (0 is false, otherwise true). float, char and long are supported as well.<br/> * <br/> * The best filter/classifier setup can be accessed after the buildClassifier call via the getBestFilter/getBestClassifier methods.<br/> * Note on the implementation: after the data has been passed through the filter, a default NumericCleaner filter is applied to the data in order to avoid numbers that are getting too small and might produce NaNs in other schemes. * <p/> <!-- globalinfo-end --> * <!-- options-start --> * Valid options are: <p/> * * <pre> -E <CC|RMSE|RRSE|MAE|RAE|COMB|ACC> * Determines the parameter used for evaluation: * CC = Correlation coefficient * RMSE = Root mean squared error * RRSE = Root relative squared error * MAE = Mean absolute error * RAE = Root absolute error * COMB = Combined = (1-abs(CC)) + RRSE + RAE * ACC = Accuracy * (default: CC)</pre> * * <pre> -y-property <option> * The Y option to test (without leading dash). * (default: classifier.ridge)</pre> * * <pre> -y-min <num> * The minimum for Y. * (default: -10)</pre> * * <pre> -y-max <num> * The maximum for Y. * (default: +5)</pre> * * <pre> -y-step <num> * The step size for Y. * (default: 1)</pre> * * <pre> -y-base <num> * The base for Y. * (default: 10)</pre> * * <pre> -y-expression <expr> * The expression for Y. * Available parameters: * BASE * FROM * TO * STEP * I - the current iteration value * (from 'FROM' to 'TO' with stepsize 'STEP') * (default: 'pow(BASE,I)')</pre> * * <pre> -filter <filter specification> * The filter to use (on X axis). Full classname of filter to include, * followed by scheme options. * (default: weka.filters.supervised.attribute.PLSFilter)</pre> * * <pre> -x-property <option> * The X option to test (without leading dash). * (default: filter.numComponents)</pre> * * <pre> -x-min <num> * The minimum for X. * (default: +5)</pre> * * <pre> -x-max <num> * The maximum for X. * (default: +20)</pre> * * <pre> -x-step <num> * The step size for X. * (default: 1)</pre> * * <pre> -x-base <num> * The base for X. * (default: 10)</pre> * * <pre> -x-expression <expr> * The expression for the X value. * Available parameters: * BASE * MIN * MAX * STEP * I - the current iteration value * (from 'FROM' to 'TO' with stepsize 'STEP') * (default: 'pow(BASE,I)')</pre> * * <pre> -extend-grid * Whether the grid can be extended. * (default: no)</pre> * * <pre> -max-grid-extensions <num> * The maximum number of grid extensions (-1 is unlimited). * (default: 3)</pre> * * <pre> -sample-size <num> * The size (in percent) of the sample to search the inital grid with. * (default: 100)</pre> * * <pre> -traversal <ROW-WISE|COLUMN-WISE> * The type of traversal for the grid. * (default: COLUMN-WISE)</pre> * * <pre> -log-file <filename> * The log file to log the messages to. * (default: none)</pre> * * <pre> -S <num> * Random number seed. * (default 1)</pre> * * <pre> -D * If set, classifier is run in debug mode and * may output additional info to the console</pre> * * <pre> -W * Full name of base classifier. * (default: weka.classifiers.functions.LinearRegression)</pre> * * <pre> * Options specific to classifier weka.classifiers.functions.LinearRegression: * </pre> * * <pre> -D * Produce debugging output. * (default no debugging output)</pre> * * <pre> -S <number of selection method> * Set the attribute selection method to use. 1 = None, 2 = Greedy. * (default 0 = M5' method)</pre> * * <pre> -C * Do not try to eliminate colinear attributes. * </pre> * * <pre> -R <double> * Set ridge parameter (default 1.0e-8). * </pre> * * <pre> * Options specific to filter weka.filters.supervised.attribute.PLSFilter ('-filter'): * </pre> * * <pre> -D * Turns on output of debugging information.</pre> * * <pre> -C <num> * The number of components to compute. * (default: 20)</pre> * * <pre> -U * Updates the class attribute as well. * (default: off)</pre> * * <pre> -M * Turns replacing of missing values on. * (default: off)</pre> * * <pre> -A <SIMPLS|PLS1> * The algorithm to use. * (default: PLS1)</pre> * * <pre> -P <none|center|standardize> * The type of preprocessing that is applied to the data. * (default: center)</pre> * <!-- options-end --> * * Examples: * <ul> * <li> * <b>Optimizing SMO with RBFKernel (C and gamma)</b> * <ul> * <li>Set the evaluation to <i>Accuracy</i>.</li> * <li>Set the filter to <code>weka.filters.AllFilter</code> since we * don't need any special data processing and we don't optimize the * filter in this case (data gets always passed through filter!).</li> * <li>Set <code>weka.classifiers.functions.SMO</code> as classifier * with <code>weka.classifiers.functions.supportVector.RBFKernel</code> * as kernel. * </li> * <li>Set the XProperty to "classifier.c", XMin to "1", XMax to "16", * XStep to "1" and the XExpression to "I". This will test the "C" * parameter of SMO for the values from 1 to 16.</li> * <li>Set the YProperty to "classifier.kernel.gamma", YMin to "-5", * YMax to "2", YStep to "1" YBase to "10" and YExpression to * "pow(BASE,I)". This will test the gamma of the RBFKernel with the * values 10^-5, 10^-4,..,10^2.</li> * </ul> * </li> * <li> * <b>Optimizing PLSFilter with LinearRegression (# of components and ridge) - default setup</b> * <ul> * <li>Set the evaluation to <i>Correlation coefficient</i>.</li> * <li>Set the filter to <code>weka.filters.supervised.attribute.PLSFilter</code>.</li> * <li>Set <code>weka.classifiers.functions.LinearRegression</code> as * classifier and use no attribute selection and no elimination of * colinear attributes.</li> * <li>Set the XProperty to "filter.numComponents", XMin to "5", XMax * to "20" (this depends heavily on your dataset, should be no more * than the number of attributes!), XStep to "1" and XExpression to * "I". This will test the number of components the PLSFilter will * produce from 5 to 20.</li> * <li>Set the YProperty to "classifier.ridge", XMin to "-10", XMax to * "5", YStep to "1" and YExpression to "pow(BASE,I)". This will * try ridge parameters from 10^-10 to 10^5.</li> * </ul> * </li> * </ul> * * General notes: * <ul> * <li>Turn the <i>debug</i> flag on in order to see some progress output in the * console</li> * <li>If you want to view the fitness landscape that GridSearch explores, * select a <i>log file</i>. This log will then contain Gnuplot data and * script block for viewing the landscape. Just copy paste those blocks * into files named accordingly and run Gnuplot with them.</li> * </ul> * * @author Bernhard Pfahringer (bernhard at cs dot waikato dot ac dot nz) * @author Geoff Holmes (geoff at cs dot waikato dot ac dot nz) * @author fracpete (fracpete at waikato dot ac dot nz) * @version $Revision: 1.3 $ * @see PLSFilter * @see LinearRegression * @see NumericCleaner */public class GridSearch extends RandomizableSingleClassifierEnhancer implements AdditionalMeasureProducer { /** * a serializable version of Point2D.Double * * @see java.awt.geom.Point2D.Double */ protected class PointDouble extends java.awt.geom.Point2D.Double implements Serializable { /** for serialization */ private static final long serialVersionUID = 7151661776161898119L; /** * the default constructor * * @param x the x value of the point * @param y the y value of the point */ public PointDouble(double x, double y) { super(x, y); } /** * returns a string representation of the Point * * @return the point as string */ public String toString() { return super.toString().replaceAll(".*\\[", "["); } } /** * a serializable version of Point * * @see java.awt.Point */ protected class PointInt extends java.awt.Point implements Serializable { /** for serialization */ private static final long serialVersionUID = -5900415163698021618L; /** * the default constructor * * @param x the x value of the point * @param y the y value of the point */ public PointInt(int x, int y) { super(x, y); } /** * returns a string representation of the Point * * @return the point as string */ public String toString() { return super.toString().replaceAll(".*\\[", "["); } } /** * for generating the parameter pairs in a grid */ protected class Grid implements Serializable { /** for serialization */ private static final long serialVersionUID = 7290732613611243139L; /** the minimum on the X axis */ protected double m_MinX; /** the maximum on the X axis */ protected double m_MaxX; /** the step size for the X axis */ protected double m_StepX; /** the label for the X axis */ protected String m_LabelX; /** the minimum on the Y axis */ protected double m_MinY; /** the maximum on the Y axis */ protected double m_MaxY; /** the step size for the Y axis */ protected double m_StepY; /** the label for the Y axis */ protected String m_LabelY; /** the number of points on the X axis */ protected int m_Width; /** the number of points on the Y axis */ protected int m_Height; /** * initializes the grid * * @param minX the minimum on the X axis * @param maxX the maximum on the X axis * @param stepX the step size for the X axis * @param minY the minimum on the Y axis * @param maxY the maximum on the Y axis * @param stepY the step size for the Y axis */ public Grid(double minX, double maxX, double stepX, double minY, double maxY, double stepY) { this(minX, maxX, stepX, "", minY, maxY, stepY, ""); } /** * initializes the grid * * @param minX the minimum on the X axis * @param maxX the maximum on the X axis * @param stepX the step size for the X axis * @param labelX the label for the X axis * @param minY the minimum on the Y axis * @param maxY the maximum on the Y axis * @param stepY the step size for the Y axis * @param labelY the label for the Y axis */ public Grid(double minX, double maxX, double stepX, String labelX, double minY, double maxY, double stepY, String labelY) { super(); m_MinX = minX; m_MaxX = maxX; m_StepX = stepX; m_LabelX = labelX; m_MinY = minY; m_MaxY = maxY; m_StepY = stepY; m_LabelY = labelY; m_Height = (int) StrictMath.round((m_MaxY - m_MinY) / m_StepY) + 1; m_Width = (int) StrictMath.round((m_MaxX - m_MinX) / m_StepX) + 1; } /** * returns the left border * * @return the left border */ public double getMinX() { return m_MinX; } /** * returns the right border *
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -