📄 decisiontable.java
字号:
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* DecisionTable.java
* Copyright (C) 1999 Mark Hall
*
*/
package weka.classifiers.rules;
import java.io.Serializable;
import java.util.BitSet;
import java.util.Enumeration;
import java.util.Hashtable;
import java.util.Random;
import java.util.Vector;
import weka.classifiers.Classifier;
import weka.classifiers.Evaluation;
import weka.classifiers.lazy.IBk;
import weka.core.AdditionalMeasureProducer;
import weka.core.FastVector;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.UnsupportedAttributeTypeException;
import weka.core.Utils;
import weka.core.WeightedInstancesHandler;
import weka.filters.Filter;
import weka.filters.unsupervised.attribute.Remove;
/**
* Class for building and using a simple decision table majority classifier.
* For more information see: <p>
*
* Kohavi R. (1995).<i> The Power of Decision Tables.</i> In Proc
* European Conference on Machine Learning.<p>
*
* Valid options are: <p>
*
* -S num <br>
* Number of fully expanded non improving subsets to consider
* before terminating a best first search.
* (Default = 5) <p>
*
* -X num <br>
* Use cross validation to evaluate features. Use number of folds = 1 for
* leave one out CV. (Default = leave one out CV) <p>
*
* -I <br>
* Use nearest neighbour instead of global table majority. <p>
*
* -R <br>
* Prints the decision table. <p>
*
* @author Mark Hall (mhall@cs.waikato.ac.nz)
* @version $Revision$
*/
public class DecisionTable extends Classifier
implements OptionHandler, WeightedInstancesHandler,
AdditionalMeasureProducer {
/** The hashtable used to hold training instances */
private Hashtable m_entries;
/** Holds the final feature set */
private int [] m_decisionFeatures;
/** Discretization filter */
private Filter m_disTransform;
/** Filter used to remove columns discarded by feature selection */
private Remove m_delTransform;
/** IB1 used to classify non matching instances rather than majority class */
private IBk m_ibk;
/** Holds the training instances */
private Instances m_theInstances;
/** The number of attributes in the dataset */
private int m_numAttributes;
/** The number of instances in the dataset */
private int m_numInstances;
/** Class is nominal */
private boolean m_classIsNominal;
/** Output debug info */
private boolean m_debug;
/** Use the IBk classifier rather than majority class */
private boolean m_useIBk;
/** Display Rules */
private boolean m_displayRules;
/**
* Maximum number of fully expanded non improving subsets for a best
* first search.
*/
private int m_maxStale;
/** Number of folds for cross validating feature sets */
private int m_CVFolds;
/** Random numbers for use in cross validation */
private Random m_rr;
/** Holds the majority class */
private double m_majority;
/**
* Returns a string describing classifier
* @return a description suitable for
* displaying in the explorer/experimenter gui
*/
public String globalInfo() {
return "Class for building and using a simple decision table majority "
+ "classifier. For more information see: \n\n"
+ "Kohavi R. (1995). \"The Power of Decision Tables.\" In Proc "
+ "European Conference on Machine Learning.";
}
/**
* Class for a node in a linked list. Used in best first search.
*/
public class Link {
/** The group */
BitSet m_group;
/** The merit */
double m_merit;
/**
* The constructor.
*
* @param gr the group
* @param mer the merit
*/
public Link (BitSet gr, double mer) {
m_group = (BitSet)gr.clone();
m_merit = mer;
}
/**
* Gets the group.
*/
public BitSet getGroup() {
return m_group;
}
/**
* Gets the merit.
*/
public double getMerit() {
return m_merit;
}
/**
* Returns string representation.
*/
public String toString() {
return ("Node: "+m_group.toString()+" "+m_merit);
}
}
/**
* Class for handling a linked list. Used in best first search.
* Extends the Vector class.
*/
public class LinkedList extends FastVector {
/**
* Removes an element (Link) at a specific index from the list.
*
* @param index the index of the element to be removed.
*/
public void removeLinkAt(int index) throws Exception {
if ((index >= 0) && (index < size())) {
removeElementAt(index);
} else {
throw new Exception("index out of range (removeLinkAt)");
}
}
/**
* Returns the element (Link) at a specific index from the list.
*
* @param index the index of the element to be returned.
*/
public Link getLinkAt(int index) throws Exception {
if (size()==0) {
throw new Exception("List is empty (getLinkAt)");
} else if ((index >= 0) && (index < size())) {
return ((Link)(elementAt(index)));
} else {
throw new Exception("index out of range (getLinkAt)");
}
}
/**
* Aadds an element (Link) to the list.
*
* @param gr the feature set specification
* @param mer the "merit" of this feature set
*/
public void addToList(BitSet gr, double mer) {
Link newL = new Link(gr, mer);
if (size()==0) {
addElement(newL);
}
else if (mer > ((Link)(firstElement())).getMerit()) {
insertElementAt(newL,0);
} else {
int i = 0;
int size = size();
boolean done = false;
while ((!done) && (i < size)) {
if (mer > ((Link)(elementAt(i))).getMerit()) {
insertElementAt(newL,i);
done = true;
} else if (i == size-1) {
addElement(newL);
done = true;
} else {
i++;
}
}
}
}
}
/**
* Class providing keys to the hash table
*/
public static class hashKey implements Serializable {
/** Array of attribute values for an instance */
private double [] attributes;
/** True for an index if the corresponding attribute value is missing. */
private boolean [] missing;
/** The values */
private String [] values;
/** The key */
private int key;
/**
* Constructor for a hashKey
*
* @param t an instance from which to generate a key
* @param numAtts the number of attributes
*/
public hashKey(Instance t, int numAtts) throws Exception {
int i;
int cindex = t.classIndex();
key = -999;
attributes = new double [numAtts];
missing = new boolean [numAtts];
for (i=0;i<numAtts;i++) {
if (i == cindex) {
missing[i] = true;
} else {
if ((missing[i] = t.isMissing(i)) == false) {
attributes[i] = t.value(i);
}
}
}
}
/**
* Convert a hash entry to a string
*
* @param t the set of instances
* @param maxColWidth width to make the fields
*/
public String toString(Instances t, int maxColWidth) {
int i;
int cindex = t.classIndex();
StringBuffer text = new StringBuffer();
for (i=0;i<attributes.length;i++) {
if (i != cindex) {
if (missing[i]) {
text.append("?");
for (int j=0;j<maxColWidth;j++) {
text.append(" ");
}
} else {
String ss = t.attribute(i).value((int)attributes[i]);
StringBuffer sb = new StringBuffer(ss);
for (int j=0;j < (maxColWidth-ss.length()+1); j++) {
sb.append(" ");
}
text.append(sb);
}
}
}
return text.toString();
}
/**
* Constructor for a hashKey
*
* @param t an array of feature values
*/
public hashKey(double [] t) {
int i;
int l = t.length;
key = -999;
attributes = new double [l];
missing = new boolean [l];
for (i=0;i<l;i++) {
if (t[i] == Double.MAX_VALUE) {
missing[i] = true;
} else {
missing[i] = false;
attributes[i] = t[i];
}
}
}
/**
* Calculates a hash code
*
* @return the hash code as an integer
*/
public int hashCode() {
int hv = 0;
if (key != -999)
return key;
for (int i=0;i<attributes.length;i++) {
if (missing[i]) {
hv += (i*13);
} else {
hv += (i * 5 * (attributes[i]+1));
}
}
if (key == -999) {
key = hv;
}
return hv;
}
/**
* Tests if two instances are equal
*
* @param b a key to compare with
*/
public boolean equals(Object b) {
if ((b == null) || !(b.getClass().equals(this.getClass()))) {
return false;
}
boolean ok = true;
boolean l;
if (b instanceof hashKey) {
hashKey n = (hashKey)b;
for (int i=0;i<attributes.length;i++) {
l = n.missing[i];
if (missing[i] || l) {
if ((missing[i] && !l) || (!missing[i] && l)) {
ok = false;
break;
}
} else {
if (attributes[i] != n.attributes[i]) {
ok = false;
break;
}
}
}
} else {
return false;
}
return ok;
}
/**
* Prints the hash code
*/
public void print_hash_code() {
System.out.println("Hash val: "+hashCode());
}
}
/**
* Inserts an instance into the hash table
*
* @param inst instance to be inserted
* @exception Exception if the instance can't be inserted
*/
private void insertIntoTable(Instance inst, double [] instA)
throws Exception {
double [] tempClassDist2;
double [] newDist;
hashKey thekey;
if (instA != null) {
thekey = new hashKey(instA);
} else {
thekey = new hashKey(inst, inst.numAttributes());
}
// see if this one is already in the table
tempClassDist2 = (double []) m_entries.get(thekey);
if (tempClassDist2 == null) {
if (m_classIsNominal) {
newDist = new double [m_theInstances.classAttribute().numValues()];
newDist[(int)inst.classValue()] = inst.weight();
// add to the table
m_entries.put(thekey, newDist);
} else {
newDist = new double [2];
newDist[0] = inst.classValue() * inst.weight();
newDist[1] = inst.weight();
// add to the table
m_entries.put(thekey, newDist);
}
} else {
// update the distribution for this instance
if (m_classIsNominal) {
tempClassDist2[(int)inst.classValue()]+=inst.weight();
// update the table
m_entries.put(thekey, tempClassDist2);
} else {
tempClassDist2[0] += (inst.classValue() * inst.weight());
tempClassDist2[1] += inst.weight();
// update the table
m_entries.put(thekey, tempClassDist2);
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -