decisiontable.java

来自「Java 编写的多种数据挖掘算法 包括聚类、分类、预处理等」· Java 代码 · 共 1,531 行 · 第 1/3 页

JAVA
1,531
字号
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* *    DecisionTable.java *    Copyright (C) 1999 Mark Hall * */package weka.classifiers.rules;import weka.classifiers.Classifier;import weka.classifiers.Evaluation;import weka.classifiers.lazy.IBk;import weka.core.AdditionalMeasureProducer;import weka.core.Capabilities;import weka.core.FastVector;import weka.core.Instance;import weka.core.Instances;import weka.core.Option;import weka.core.OptionHandler;import weka.core.TechnicalInformation;import weka.core.TechnicalInformation.Type;import weka.core.TechnicalInformation.Field;import weka.core.TechnicalInformationHandler;import weka.core.Utils;import weka.core.WeightedInstancesHandler;import weka.core.Capabilities.Capability;import weka.filters.Filter;import weka.filters.unsupervised.attribute.Remove;import java.io.Serializable;import java.util.BitSet;import java.util.Enumeration;import java.util.Hashtable;import java.util.Random;import java.util.Vector;/** <!-- globalinfo-start --> * Class for building and using a simple decision table majority classifier.<br/> * <br/> * For more information see: <br/> * <br/> * Ron Kohavi: The Power of Decision Tables. In: 8th European Conference on Machine Learning, 174-189, 1995. * <p/> <!-- globalinfo-end --> * <!-- technical-bibtex-start --> * BibTeX: * <pre> * &#64;inproceedings{Kohavi1995, *    author = {Ron Kohavi}, *    booktitle = {8th European Conference on Machine Learning}, *    pages = {174-189}, *    publisher = {Springer}, *    title = {The Power of Decision Tables}, *    year = {1995} * } * </pre> * <p/> <!-- technical-bibtex-end --> * <!-- options-start --> * Valid options are: <p/> *  * <pre> -S &lt;number of non improving nodes&gt; *  Number of fully expanded non improving subsets to consider *  before terminating a best first search. *  Use in conjunction with -B. (Default = 5)</pre> *  * <pre> -X &lt;number of folds&gt; *  Use cross validation to evaluate features. *  Use number of folds = 1 for leave one out CV. *  (Default = leave one out CV)</pre> *  * <pre> -I *  Use nearest neighbour instead of global table majority.</pre> *  * <pre> -R *  Display decision table rules. * </pre> *  <!-- options-end --> * * @author Mark Hall (mhall@cs.waikato.ac.nz) * @version $Revision: 1.34 $  */public class DecisionTable   extends Classifier   implements OptionHandler, WeightedInstancesHandler, 	     AdditionalMeasureProducer, TechnicalInformationHandler {    /** for serialization */  static final long serialVersionUID = 2788557078165701326L;    /** The hashtable used to hold training instances */  private Hashtable m_entries;  /** Holds the final feature set */  private int [] m_decisionFeatures;  /** Discretization filter */  private Filter m_disTransform;  /** Filter used to remove columns discarded by feature selection */  private Remove m_delTransform;  /** IB1 used to classify non matching instances rather than majority class */  private IBk m_ibk;    /** Holds the training instances */  private Instances m_theInstances;    /** The number of attributes in the dataset */  private int m_numAttributes;  /** The number of instances in the dataset */  private int m_numInstances;  /** Class is nominal */  private boolean m_classIsNominal;  /** Output debug info */  private boolean m_debug;  /** Use the IBk classifier rather than majority class */  private boolean m_useIBk;  /** Display Rules */  private boolean m_displayRules;  /**    * Maximum number of fully expanded non improving subsets for a best    * first search.    */  private int m_maxStale;  /** Number of folds for cross validating feature sets */  private int m_CVFolds;  /** Random numbers for use in cross validation */  private Random m_rr;  /** Holds the majority class */  private double m_majority;  /**   * Returns a string describing classifier   * @return a description suitable for   * displaying in the explorer/experimenter gui   */  public String globalInfo() {    return          "Class for building and using a simple decision table majority "      + "classifier.\n\n"      + "For more information see: \n\n"      + getTechnicalInformation().toString();  }  /**   * Returns an instance of a TechnicalInformation object, containing    * detailed information about the technical background of this class,   * e.g., paper reference or book this class is based on.   *    * @return the technical information about this class   */  public TechnicalInformation getTechnicalInformation() {    TechnicalInformation 	result;        result = new TechnicalInformation(Type.INPROCEEDINGS);    result.setValue(Field.AUTHOR, "Ron Kohavi");    result.setValue(Field.TITLE, "The Power of Decision Tables");    result.setValue(Field.BOOKTITLE, "8th European Conference on Machine Learning");    result.setValue(Field.YEAR, "1995");    result.setValue(Field.PAGES, "174-189");    result.setValue(Field.PUBLISHER, "Springer");        return result;  }  /**   * Class for a node in a linked list. Used in best first search.   */  public class Link {    /** The group */    BitSet m_group;    /** The merit */    double m_merit;    /**     * The constructor.     *     * @param gr the group     * @param mer the merit     */    public Link (BitSet gr, double mer) {      m_group = (BitSet)gr.clone();      m_merit = mer;    }      /**     * Gets the group.     *      * @return the group     */    public BitSet getGroup() {      return m_group;    }      /**     * Gets the merit.     *      * @return the merit     */    public double getMerit() {      return m_merit;    }    /**     * Returns string representation.     *      * @return a string representation     */    public String toString() {      return ("Node: "+m_group.toString()+"  "+m_merit);    }  }    /**   * Class for handling a linked list. Used in best first search.   * Extends the Vector class.   */  public class LinkedList     extends FastVector {    /** for serialization */    static final long serialVersionUID = -8323010516352768601L;        /**     * Removes an element (Link) at a specific index from the list.     *     * @param index the index of the element to be removed.     * @throws Exception iof index out of range     */    public void removeLinkAt(int index) throws Exception {      if ((index >= 0) && (index < size())) {	removeElementAt(index);      } else {	throw new Exception("index out of range (removeLinkAt)");      }    }    /**     * Returns the element (Link) at a specific index from the list.     *     * @param index the index of the element to be returned.     * @return the link     * @throws Exception if index out of range     */    public Link getLinkAt(int index) throws Exception {      if (size()==0) {	throw new Exception("List is empty (getLinkAt)");      } else if ((index >= 0) && (index < size())) {	return ((Link)(elementAt(index)));      } else {	throw new Exception("index out of range (getLinkAt)");      }    }    /**     * Aadds an element (Link) to the list.     *     * @param gr the feature set specification     * @param mer the "merit" of this feature set     */    public void addToList(BitSet gr, double mer) {      Link newL = new Link(gr, mer);	      if (size()==0) {	addElement(newL);      }      else if (mer > ((Link)(firstElement())).getMerit()) {	insertElementAt(newL,0);      } else {	int i = 0;	int size = size();	boolean done = false;	while ((!done) && (i < size)) {	  if (mer > ((Link)(elementAt(i))).getMerit()) {	    insertElementAt(newL,i);	    done = true;	  } else if (i == size-1) {	    addElement(newL);	    done = true;	  } else {	    i++;	  }	}      }    }  }  /**   * Class providing keys to the hash table   */  public static class hashKey     implements Serializable {        /** for serialization */    static final long serialVersionUID = 5674163500154964602L;        /** Array of attribute values for an instance */    private double [] attributes;        /** True for an index if the corresponding attribute value is missing. */    private boolean [] missing;    /** The key */    private int key;    /**     * Constructor for a hashKey     *     * @param t an instance from which to generate a key     * @param numAtts the number of attributes     * @param ignoreClass if true treat the class as a normal attribute     * @throws Exception if something goes wrong     */    public hashKey(Instance t, int numAtts, boolean ignoreClass) throws Exception {      int i;      int cindex = t.classIndex();      key = -999;      attributes = new double [numAtts];      missing = new boolean [numAtts];      for (i=0;i<numAtts;i++) {	if (i == cindex && !ignoreClass) {	  missing[i] = true;	} else {	  if ((missing[i] = t.isMissing(i)) == false) {	    attributes[i] = t.value(i);	  }	}      }    }    /**     * Convert a hash entry to a string     *     * @param t the set of instances     * @param maxColWidth width to make the fields     * @return string representation of the hash entry     */    public String toString(Instances t, int maxColWidth) {      int i;      int cindex = t.classIndex();      StringBuffer text = new StringBuffer();            for (i=0;i<attributes.length;i++) {	if (i != cindex) {	  if (missing[i]) {	    text.append("?");	    for (int j=0;j<maxColWidth;j++) {	      text.append(" ");	    }	  } else {	    String ss = t.attribute(i).value((int)attributes[i]);	    StringBuffer sb = new StringBuffer(ss);	    	    for (int j=0;j < (maxColWidth-ss.length()+1); j++) {		sb.append(" ");	    }	    text.append(sb);	  }	}      }      return text.toString();    }    /**     * Constructor for a hashKey     *     * @param t an array of feature values     */    public hashKey(double [] t) {      int i;      int l = t.length;      key = -999;      attributes = new double [l];      missing = new boolean [l];      for (i=0;i<l;i++) {	if (t[i] == Double.MAX_VALUE) {	  missing[i] = true;	} else {	  missing[i] = false;	  attributes[i] = t[i];	}      }    }        /**     * Calculates a hash code     *     * @return the hash code as an integer     */    public int hashCode() {      int hv = 0;            if (key != -999)	return key;      for (int i=0;i<attributes.length;i++) {	if (missing[i]) {	  hv += (i*13);	} else {	  hv += (i * 5 * (attributes[i]+1));	}      }      if (key == -999) {	key = hv;      }      return hv;    }    /**     * Tests if two instances are equal     *     * @param b a key to compare with     * @return true if both objects are equal     */    public boolean equals(Object b) {            if ((b == null) || !(b.getClass().equals(this.getClass()))) {        return false;      }      boolean ok = true;      boolean l;      if (b instanceof hashKey) {	hashKey n = (hashKey)b;	for (int i=0;i<attributes.length;i++) {	  l = n.missing[i];	  if (missing[i] || l) {	    if ((missing[i] && !l) || (!missing[i] && l)) {	      ok = false;	      break;	    }	  } else {	    if (attributes[i] != n.attributes[i]) {	      ok = false;	      break;	    }	  }	}      } else {	return false;      }      return ok;    }        /**     * Prints the hash code     */    public void print_hash_code() {            System.out.println("Hash val: "+hashCode());    }  }  /**   * Inserts an instance into the hash table   *   * @param inst instance to be inserted   * @param instA to create the hash key from   * @throws Exception if the instance can't be inserted   */  private void insertIntoTable(Instance inst, double [] instA)       throws Exception {    double [] tempClassDist2;    double [] newDist;    hashKey thekey;    if (instA != null) {

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?