📄 fcbfsearch.java
字号:
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* * RELEASE INFORMATION (December 27, 2004) * * FCBF algorithm: * Template obtained from Weka * Developed for Weka by Zheng Alan Zhao * December 27, 2004 * * FCBF algorithm is a feature selection method based on Symmetrical Uncertainty Measurement for * relevance redundancy analysis. The details of FCBF algorithm are in: * <!-- technical-plaintext-start --> * Lei Yu, Huan Liu: Feature Selection for High-Dimensional Data: A Fast Correlation-Based Filter Solution. In: Proceedings of the Twentieth International Conference on Machine Learning, 856-863, 2003. <!-- technical-plaintext-end --> * * * CONTACT INFORMATION * * For algorithm implementation: * Zheng Zhao: zhaozheng at asu.edu * * For the algorithm: * Lei Yu: leiyu at asu.edu * Huan Liu: hliu at asu.edu * * Data Mining and Machine Learning Lab * Computer Science and Engineering Department * Fulton School of Engineering * Arizona State University * Tempe, AZ 85287 * * FCBFSearch.java * * Copyright (C) 2004 Data Mining and Machine Learning Lab, * Computer Science and Engineering Department, * Fulton School of Engineering, * Arizona State University * */package weka.attributeSelection;import weka.core.Instances;import weka.core.Option;import weka.core.OptionHandler;import weka.core.Range;import weka.core.TechnicalInformation;import weka.core.TechnicalInformation.Type;import weka.core.TechnicalInformation.Field;import weka.core.TechnicalInformationHandler;import weka.core.Utils;import java.util.Enumeration;import java.util.Vector;/** <!-- globalinfo-start --> * FCBF : <br/> * <br/> * Feature selection method based on correlation measureand relevance&redundancy analysis. Use in conjunction with an attribute set evaluator (SymmetricalUncertAttributeEval).<br/> * <br/> * For more information see:<br/> * <br/> * Lei Yu, Huan Liu: Feature Selection for High-Dimensional Data: A Fast Correlation-Based Filter Solution. In: Proceedings of the Twentieth International Conference on Machine Learning, 856-863, 2003. * <p/> <!-- globalinfo-end --> * <!-- technical-bibtex-start --> * BibTeX: * <pre> * @inproceedings{Yu2003, * author = {Lei Yu and Huan Liu}, * booktitle = {Proceedings of the Twentieth International Conference on Machine Learning}, * pages = {856-863}, * publisher = {AAAI Press}, * title = {Feature Selection for High-Dimensional Data: A Fast Correlation-Based Filter Solution}, * year = {2003} * } * </pre> * <p/> <!-- technical-bibtex-end --> * <!-- options-start --> * Valid options are: <p/> * * <pre> -D <create dataset> * Specify Whether the selector generates a new dataset.</pre> * * <pre> -P <start set> * Specify a starting set of attributes. * Eg. 1,3,5-7. * Any starting attributes specified are * ignored during the ranking.</pre> * * <pre> -T <threshold> * Specify a theshold by which attributes * may be discarded from the ranking.</pre> * * <pre> -N <num to select> * Specify number of attributes to select</pre> * <!-- options-end --> * * @author Zheng Zhao: zhaozheng at asu.edu * @version $Revision: 1.6 $ */public class FCBFSearch extends ASSearch implements RankedOutputSearch, StartSetHandler, OptionHandler, TechnicalInformationHandler { /** for serialization */ static final long serialVersionUID = 8209699587428369942L; /** Holds the starting set as an array of attributes */ private int[] m_starting; /** Holds the start set for the search as a range */ private Range m_startRange; /** Holds the ordered list of attributes */ private int[] m_attributeList; /** Holds the list of attribute merit scores */ private double[] m_attributeMerit; /** Data has class attribute---if unsupervised evaluator then no class */ private boolean m_hasClass; /** Class index of the data if supervised evaluator */ private int m_classIndex; /** The number of attribtes */ private int m_numAttribs; /** * A threshold by which to discard attributes---used by the * AttributeSelection module */ private double m_threshold; /** The number of attributes to select. -1 indicates that all attributes are to be retained. Has precedence over m_threshold */ private int m_numToSelect = -1; /** Used to compute the number to select */ private int m_calculatedNumToSelect = -1; /*-----------------add begin 2004-11-15 by alan-----------------*/ /** Used to determine whether we create a new dataset according to the selected features */ private boolean m_generateOutput = false; /** Used to store the ref of the Evaluator we use*/ private ASEvaluation m_asEval; /** Holds the list of attribute merit scores generated by FCBF */ private double[][] m_rankedFCBF; /** Hold the list of selected features*/ private double[][] m_selectedFeatures; /*-----------------add end 2004-11-15 by alan-----------------*/ /** * Returns a string describing this search method * @return a description of the search suitable for * displaying in the explorer/experimenter gui */ public String globalInfo() { return "FCBF : \n\nFeature selection method based on correlation measure" + "and relevance&redundancy analysis. " + "Use in conjunction with an attribute set evaluator (SymmetricalUncertAttributeEval).\n\n" + "For more information see:\n\n" + getTechnicalInformation().toString(); } /** * Returns an instance of a TechnicalInformation object, containing * detailed information about the technical background of this class, * e.g., paper reference or book this class is based on. * * @return the technical information about this class */ public TechnicalInformation getTechnicalInformation() { TechnicalInformation result; result = new TechnicalInformation(Type.INPROCEEDINGS); result.setValue(Field.AUTHOR, "Lei Yu and Huan Liu"); result.setValue(Field.TITLE, "Feature Selection for High-Dimensional Data: A Fast Correlation-Based Filter Solution"); result.setValue(Field.BOOKTITLE, "Proceedings of the Twentieth International Conference on Machine Learning"); result.setValue(Field.YEAR, "2003"); result.setValue(Field.PAGES, "856-863"); result.setValue(Field.PUBLISHER, "AAAI Press"); return result; } /** * Constructor */ public FCBFSearch () { resetOptions(); } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String numToSelectTipText() { return "Specify the number of attributes to retain. The default value " +"(-1) indicates that all attributes are to be retained. Use either " +"this option or a threshold to reduce the attribute set."; } /** * Specify the number of attributes to select from the ranked list. -1 * indicates that all attributes are to be retained. * @param n the number of attributes to retain */ public void setNumToSelect(int n) { m_numToSelect = n; } /** * Gets the number of attributes to be retained. * @return the number of attributes to retain */ public int getNumToSelect() { return m_numToSelect; } /** * Gets the calculated number to select. This might be computed * from a threshold, or if < 0 is set as the number to select then * it is set to the number of attributes in the (transformed) data. * @return the calculated number of attributes to select */ public int getCalculatedNumToSelect() { if (m_numToSelect >= 0) { m_calculatedNumToSelect = m_numToSelect; } if (m_selectedFeatures.length>0 && m_selectedFeatures.length<m_calculatedNumToSelect) { m_calculatedNumToSelect = m_selectedFeatures.length; } return m_calculatedNumToSelect; } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String thresholdTipText() { return "Set threshold by which attributes can be discarded. Default value " + "results in no attributes being discarded. Use either this option or " +"numToSelect to reduce the attribute set."; } /** * Set the threshold by which the AttributeSelection module can discard * attributes. * @param threshold the threshold. */ public void setThreshold(double threshold) { m_threshold = threshold; } /** * Returns the threshold so that the AttributeSelection module can * discard attributes from the ranking. * @return the threshold */ public double getThreshold() { return m_threshold; } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String generateRankingTipText() { return "A constant option. FCBF is capable of generating" +" attribute rankings."; } /** * This is a dummy set method---Ranker is ONLY capable of producing * a ranked list of attributes for attribute evaluators. * @param doRank this parameter is N/A and is ignored */ public void setGenerateRanking(boolean doRank) { } /** * This is a dummy method. Ranker can ONLY be used with attribute * evaluators and as such can only produce a ranked list of attributes * @return true all the time. */ public boolean getGenerateRanking() { return true; } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String generateDataOutputTipText() { return "Generating new dataset according to the selected features." +" "; } /** * Sets the flag, by which the AttributeSelection module decide * whether create a new dataset according to the selected features. * @param doGenerate the flag, by which the AttributeSelection module * decide whether create a new dataset according to the selected * features */ public void setGenerateDataOutput(boolean doGenerate) { this.m_generateOutput = doGenerate; } /** * Returns the flag, by which the AttributeSelection module decide * whether create a new dataset according to the selected features. * @return the flag, by which the AttributeSelection module decide * whether create a new dataset according to the selected features. */ public boolean getGenerateDataOutput() { return this.m_generateOutput; } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String startSetTipText() { return "Specify a set of attributes to ignore. " +" When generating the ranking, FCBF will not evaluate the attributes " +" in this list. " +"This is specified as a comma " +"seperated list off attribute indexes starting at 1. It can include " +"ranges. Eg. 1,2,5-9,17."; } /** * Sets a starting set of attributes for the search. It is the * search method's responsibility to report this start set (if any) * in its toString() method. * @param startSet a string containing a list of attributes (and or ranges), * eg. 1,2,6,10-15. * @throws Exception if start set can't be set. */ public void setStartSet (String startSet) throws Exception { m_startRange.setRanges(startSet); } /** * Returns a list of attributes (and or attribute ranges) as a String * @return a list of attributes (and or attribute ranges) */ public String getStartSet () { return m_startRange.getRanges(); } /** * Returns an enumeration describing the available options. * @return an enumeration of all the available options. **/ public Enumeration listOptions () { Vector newVector = new Vector(4); newVector.addElement(new Option( "\tSpecify Whether the selector generates a new dataset.", "D", 1, "-D <create dataset>")); newVector.addElement(new Option( "\tSpecify a starting set of attributes.\n" + "\t\tEg. 1,3,5-7.\n" + "\tAny starting attributes specified are\n" + "\tignored during the ranking.", "P", 1 , "-P <start set>")); newVector.addElement(new Option( "\tSpecify a theshold by which attributes\n" + "\tmay be discarded from the ranking.", "T", 1, "-T <threshold>")); newVector.addElement(new Option( "\tSpecify number of attributes to select", "N", 1, "-N <num to select>")); return newVector.elements(); } /** * Parses a given list of options. <p/> * <!-- options-start --> * Valid options are: <p/> * * <pre> -D <create dataset> * Specify Whether the selector generates a new dataset.</pre> * * <pre> -P <start set>
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -