📄 decisiontable.java
字号:
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * DecisionTable.java * Copyright (C) 1999 Mark Hall * */package weka.classifiers;import java.io.*;import java.util.*;import weka.core.*;import weka.filters.*;import weka.classifiers.*;import weka.classifiers.j48.*;/** * Class for building and using a simple decision table majority classifier. * For more information see: <p> * * Kohavi R. (1995).<i> The Power of Decision Tables.</i> In Proc * European Conference on Machine Learning.<p> * * Valid options are: <p> * * -S num <br> * Number of fully expanded non improving subsets to consider * before terminating a best first search. * (Default = 5) <p> * * -X num <br> * Use cross validation to evaluate features. Use number of folds = 1 for * leave one out CV. (Default = leave one out CV) <p> * * -I <br> * Use nearest neighbour instead of global table majority. <p> * * -R <br> * Prints the decision table. <p> * * @author Mark Hall (mhall@cs.waikato.ac.nz) * @version $Revision: 1.21 $ */public class DecisionTable extends DistributionClassifier implements OptionHandler, WeightedInstancesHandler, AdditionalMeasureProducer { /** The hashtable used to hold training instances */ private Hashtable m_entries; /** Holds the final feature set */ private int [] m_decisionFeatures; /** Discretization filter */ private DiscretizeFilter m_disTransform; /** Filter used to remove columns discarded by feature selection */ private AttributeFilter m_delTransform; /** IB1 used to classify non matching instances rather than majority class */ private IBk m_ibk; /** Holds the training instances */ private Instances m_theInstances; /** The number of attributes in the dataset */ private int m_numAttributes; /** The number of instances in the dataset */ private int m_numInstances; /** Class is nominal */ private boolean m_classIsNominal; /** Output debug info */ private boolean m_debug; /** Use the IBk classifier rather than majority class */ private boolean m_useIBk; /** Display Rules */ private boolean m_displayRules; /** * Maximum number of fully expanded non improving subsets for a best * first search. */ private int m_maxStale; /** Number of folds for cross validating feature sets */ private int m_CVFolds; /** Random numbers for use in cross validation */ private Random m_rr; /** Holds the majority class */ private double m_majority; /** * Class for a node in a linked list. Used in best first search. */ public class Link { /** The group */ BitSet m_group; /** The merit */ double m_merit; /** * The constructor. * * @param gr the group * @param mer the merit */ public Link (BitSet gr, double mer) { m_group = (BitSet)gr.clone(); m_merit = mer; } /** * Gets the group. */ public BitSet getGroup() { return m_group; } /** * Gets the merit. */ public double getMerit() { return m_merit; } /** * Returns string representation. */ public String toString() { return ("Node: "+m_group.toString()+" "+m_merit); } } /** * Class for handling a linked list. Used in best first search. * Extends the Vector class. */ public class LinkedList extends FastVector { /** * Removes an element (Link) at a specific index from the list. * * @param index the index of the element to be removed. */ public void removeLinkAt(int index) throws Exception { if ((index >= 0) && (index < size())) { removeElementAt(index); } else { throw new Exception("index out of range (removeLinkAt)"); } } /** * Returns the element (Link) at a specific index from the list. * * @param index the index of the element to be returned. */ public Link getLinkAt(int index) throws Exception { if (size()==0) { throw new Exception("List is empty (getLinkAt)"); } else if ((index >= 0) && (index < size())) { return ((Link)(elementAt(index))); } else { throw new Exception("index out of range (getLinkAt)"); } } /** * Aadds an element (Link) to the list. * * @param gr the feature set specification * @param mer the "merit" of this feature set */ public void addToList(BitSet gr, double mer) { Link newL = new Link(gr, mer); if (size()==0) { addElement(newL); } else if (mer > ((Link)(firstElement())).getMerit()) { insertElementAt(newL,0); } else { int i = 0; int size = size(); boolean done = false; while ((!done) && (i < size)) { if (mer > ((Link)(elementAt(i))).getMerit()) { insertElementAt(newL,i); done = true; } else if (i == size-1) { addElement(newL); done = true; } else { i++; } } } } } /** * Class providing keys to the hash table */ public class hashKey implements Serializable { /** Array of attribute values for an instance */ private double [] attributes; /** True for an index if the corresponding attribute value is missing. */ private boolean [] missing; /** The values */ private String [] values; /** The key */ private int key; /** * Constructor for a hashKey * * @param t an instance from which to generate a key * @param numAtts the number of attributes */ public hashKey(Instance t, int numAtts) throws Exception { int i; int cindex = t.classIndex(); key = -999; attributes = new double [numAtts]; missing = new boolean [numAtts]; for (i=0;i<numAtts;i++) { if (i == cindex) { missing[i] = true; } else { if ((missing[i] = t.isMissing(i)) == false) { attributes[i] = t.value(i); } } } } /** * Convert a hash entry to a string * * @param t the set of instances * @param maxColWidth width to make the fields */ public String toString(Instances t, int maxColWidth) { int i; int cindex = t.classIndex(); StringBuffer text = new StringBuffer(); for (i=0;i<attributes.length;i++) { if (i != cindex) { if (missing[i]) { text.append("?"); for (int j=0;j<maxColWidth;j++) { text.append(" "); } } else { String ss = t.attribute(i).value((int)attributes[i]); StringBuffer sb = new StringBuffer(ss); for (int j=0;j < (maxColWidth-ss.length()+1); j++) { sb.append(" "); } text.append(sb); } } } return text.toString(); } /** * Constructor for a hashKey * * @param t an array of feature values */ public hashKey(double [] t) { int i; int l = t.length; key = -999; attributes = new double [l]; missing = new boolean [l]; for (i=0;i<l;i++) { if (t[i] == Double.MAX_VALUE) { missing[i] = true; } else { missing[i] = false; attributes[i] = t[i]; } } } /** * Calculates a hash code * * @return the hash code as an integer */ public int hashCode() { int hv = 0; if (key != -999) return key; for (int i=0;i<attributes.length;i++) { if (missing[i]) { hv += (i*13); } else { hv += (i * 5 * (attributes[i]+1)); } } if (key == -999) { key = hv; } return hv; } /** * Tests if two instances are equal * * @param b a key to compare with */ public boolean equals(Object b) { if ((b == null) || !(b.getClass().equals(this.getClass()))) { return false; } boolean ok = true; boolean l; if (b instanceof hashKey) { hashKey n = (hashKey)b; for (int i=0;i<attributes.length;i++) { l = n.missing[i]; if (missing[i] || l) { if ((missing[i] && !l) || (!missing[i] && l)) { ok = false; break; } } else { if (attributes[i] != n.attributes[i]) { ok = false; break; } } } } else { return false; } return ok; } /** * Prints the hash code */ public void print_hash_code() { System.out.println("Hash val: "+hashCode()); } } /** * Inserts an instance into the hash table * * @param inst instance to be inserted * @exception Exception if the instance can't be inserted */ private void insertIntoTable(Instance inst, double [] instA) throws Exception { double [] tempClassDist2; double [] newDist; hashKey thekey; if (instA != null) { thekey = new hashKey(instA); } else { thekey = new hashKey(inst, inst.numAttributes()); } // see if this one is already in the table tempClassDist2 = (double []) m_entries.get(thekey); if (tempClassDist2 == null) { if (m_classIsNominal) { newDist = new double [m_theInstances.classAttribute().numValues()]; newDist[(int)inst.classValue()] = inst.weight(); // add to the table m_entries.put(thekey, newDist); } else { newDist = new double [2]; newDist[0] = inst.classValue() * inst.weight(); newDist[1] = inst.weight(); // add to the table m_entries.put(thekey, newDist); } } else { // update the distribution for this instance if (m_classIsNominal) { tempClassDist2[(int)inst.classValue()]+=inst.weight(); // update the table m_entries.put(thekey, tempClassDist2); } else { tempClassDist2[0] += (inst.classValue() * inst.weight()); tempClassDist2[1] += inst.weight(); // update the table m_entries.put(thekey, tempClassDist2); } } } /** * Classifies an instance for internal leave one out cross validation * of feature sets * * @param instance instance to be "left out" and classified
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -