📄 decisiontable.java
字号:
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * DecisionTable.java * Copyright (C) 1999 Mark Hall * */package weka.classifiers.rules;import weka.classifiers.Classifier;import weka.classifiers.lazy.IBk;import weka.classifiers.Evaluation;import weka.core.AdditionalMeasureProducer;import weka.core.Capabilities;import weka.core.FastVector;import weka.core.Instance;import weka.core.Instances;import weka.core.Option;import weka.core.OptionHandler;import weka.core.Tag;import weka.core.TechnicalInformation;import weka.core.TechnicalInformationHandler;import weka.core.Utils;import weka.core.WeightedInstancesHandler;import weka.core.Capabilities.Capability;import weka.core.TechnicalInformation.Field;import weka.core.TechnicalInformation.Type;import weka.core.SelectedTag;import weka.filters.Filter;import weka.filters.unsupervised.attribute.Remove;import weka.attributeSelection.SubsetEvaluator;import weka.attributeSelection.ASSearch;import weka.attributeSelection.BestFirst;import java.io.Serializable;import java.util.Arrays;import java.util.BitSet;import java.util.Enumeration;import java.util.Hashtable;import java.util.Random;import java.util.Vector;/** <!-- globalinfo-start --> * Class for building and using a simple decision table majority classifier.<br/> * <br/> * For more information see: <br/> * <br/> * Ron Kohavi: The Power of Decision Tables. In: 8th European Conference on Machine Learning, 174-189, 1995. * <p/> <!-- globalinfo-end --> * <!-- technical-bibtex-start --> * BibTeX: * <pre> * @inproceedings{Kohavi1995, * author = {Ron Kohavi}, * booktitle = {8th European Conference on Machine Learning}, * pages = {174-189}, * publisher = {Springer}, * title = {The Power of Decision Tables}, * year = {1995} * } * </pre> * <p/> <!-- technical-bibtex-end --> * <!-- options-start --> * Valid options are: <p/> * * <pre> -X <number of folds> * Use cross validation to evaluate features. * Use number of folds = 1 for leave one out CV. * (Default = leave one out CV)</pre> * * <pre> -I * Use nearest neighbour instead of global table majority.</pre> * * <pre> -R * Display decision table rules. * </pre> * <!-- options-end --> * * @author Mark Hall (mhall@cs.waikato.ac.nz) * @version $Revision: 1.39 $ */public class DecisionTable extends Classifier implements OptionHandler, WeightedInstancesHandler, AdditionalMeasureProducer, TechnicalInformationHandler { /** for serialization */ static final long serialVersionUID = 2888557078165701326L; //static final long serialVersionUID = 2888557078885701326L; /** The hashtable used to hold training instances */ protected Hashtable m_entries; /** The class priors to use when there is no match in the table */ protected double [] m_classPriorCounts; protected double [] m_classPriors; /** Holds the final feature set */ protected int [] m_decisionFeatures; /** Discretization filter */ protected Filter m_disTransform; /** Filter used to remove columns discarded by feature selection */ protected Remove m_delTransform; /** IB1 used to classify non matching instances rather than majority class */ protected IBk m_ibk; /** Holds the original training instances */ protected Instances m_theInstances; /** Holds the final feature selected set of instances */ protected Instances m_dtInstances; /** The number of attributes in the dataset */ protected int m_numAttributes; /** The number of instances in the dataset */ private int m_numInstances; /** Class is nominal */ protected boolean m_classIsNominal; /** Use the IBk classifier rather than majority class */ protected boolean m_useIBk; /** Display Rules */ protected boolean m_displayRules; /** Number of folds for cross validating feature sets */ private int m_CVFolds; /** Random numbers for use in cross validation */ private Random m_rr; /** Holds the majority class */ protected double m_majority; /** The search method to use */ protected ASSearch m_search = new BestFirst(); /** Our own internal evaluator */ protected SubsetEvaluator m_evaluator; /** The evaluation object used to evaluate subsets */ protected Evaluation m_evaluation; /** default is accuracy for discrete class and RMSE for numeric class */ public static final int EVAL_DEFAULT = 1; public static final int EVAL_ACCURACY = 2; public static final int EVAL_RMSE = 3; public static final int EVAL_MAE = 4; public static final int EVAL_AUC = 5; public static final Tag [] TAGS_EVALUATION = { new Tag(EVAL_DEFAULT, "Default: accuracy (discrete class); RMSE (numeric class)"), new Tag(EVAL_ACCURACY, "Accuracy (discrete class only"), new Tag(EVAL_RMSE, "RMSE (of the class probabilities for discrete class)"), new Tag(EVAL_MAE, "MAE (of the class probabilities for discrete class)"), new Tag(EVAL_AUC, "AUC (area under the ROC curve - discrete class only)") }; protected int m_evaluationMeasure = EVAL_DEFAULT; /** * Returns a string describing classifier * @return a description suitable for * displaying in the explorer/experimenter gui */ public String globalInfo() { return "Class for building and using a simple decision table majority " + "classifier.\n\n" + "For more information see: \n\n" + getTechnicalInformation().toString(); } /** * Returns an instance of a TechnicalInformation object, containing * detailed information about the technical background of this class, * e.g., paper reference or book this class is based on. * * @return the technical information about this class */ public TechnicalInformation getTechnicalInformation() { TechnicalInformation result; result = new TechnicalInformation(Type.INPROCEEDINGS); result.setValue(Field.AUTHOR, "Ron Kohavi"); result.setValue(Field.TITLE, "The Power of Decision Tables"); result.setValue(Field.BOOKTITLE, "8th European Conference on Machine Learning"); result.setValue(Field.YEAR, "1995"); result.setValue(Field.PAGES, "174-189"); result.setValue(Field.PUBLISHER, "Springer"); return result; } /** * Class providing keys to the hash table */ public static class hashKey implements Serializable { /** for serialization */ static final long serialVersionUID = 5674163500154964602L; /** Array of attribute values for an instance */ private double [] attributes; /** True for an index if the corresponding attribute value is missing. */ private boolean [] missing; /** The key */ private int key; /** * Constructor for a hashKey * * @param t an instance from which to generate a key * @param numAtts the number of attributes * @param ignoreClass if true treat the class as a normal attribute * @throws Exception if something goes wrong */ public hashKey(Instance t, int numAtts, boolean ignoreClass) throws Exception { int i; int cindex = t.classIndex(); key = -999; attributes = new double [numAtts]; missing = new boolean [numAtts]; for (i=0;i<numAtts;i++) { if (i == cindex && !ignoreClass) { missing[i] = true; } else { if ((missing[i] = t.isMissing(i)) == false) { attributes[i] = t.value(i); } } } } /** * Convert a hash entry to a string * * @param t the set of instances * @param maxColWidth width to make the fields * @return string representation of the hash entry */ public String toString(Instances t, int maxColWidth) { int i; int cindex = t.classIndex(); StringBuffer text = new StringBuffer(); for (i=0;i<attributes.length;i++) { if (i != cindex) { if (missing[i]) { text.append("?"); for (int j=0;j<maxColWidth;j++) { text.append(" "); } } else { String ss = t.attribute(i).value((int)attributes[i]); StringBuffer sb = new StringBuffer(ss); for (int j=0;j < (maxColWidth-ss.length()+1); j++) { sb.append(" "); } text.append(sb); } } } return text.toString(); } /** * Constructor for a hashKey * * @param t an array of feature values */ public hashKey(double [] t) { int i; int l = t.length; key = -999; attributes = new double [l]; missing = new boolean [l]; for (i=0;i<l;i++) { if (t[i] == Double.MAX_VALUE) { missing[i] = true; } else { missing[i] = false; attributes[i] = t[i]; } } } /** * Calculates a hash code * * @return the hash code as an integer */ public int hashCode() { int hv = 0; if (key != -999) return key; for (int i=0;i<attributes.length;i++) { if (missing[i]) { hv += (i*13); } else { hv += (i * 5 * (attributes[i]+1)); } } if (key == -999) { key = hv; } return hv; } /** * Tests if two instances are equal * * @param b a key to compare with * @return true if both objects are equal */ public boolean equals(Object b) { if ((b == null) || !(b.getClass().equals(this.getClass()))) { return false; } boolean ok = true; boolean l; if (b instanceof hashKey) { hashKey n = (hashKey)b; for (int i=0;i<attributes.length;i++) { l = n.missing[i]; if (missing[i] || l) { if ((missing[i] && !l) || (!missing[i] && l)) { ok = false; break; } } else { if (attributes[i] != n.attributes[i]) { ok = false; break; } } } } else { return false; } return ok; } /** * Prints the hash code */ public void print_hash_code() { System.out.println("Hash val: "+hashCode()); } } /** * Inserts an instance into the hash table * * @param inst instance to be inserted * @param instA to create the hash key from * @throws Exception if the instance can't be inserted */ private void insertIntoTable(Instance inst, double [] instA) throws Exception { double [] tempClassDist2; double [] newDist; hashKey thekey; if (instA != null) { thekey = new hashKey(instA); } else { thekey = new hashKey(inst, inst.numAttributes(), false); } // see if this one is already in the table tempClassDist2 = (double []) m_entries.get(thekey); if (tempClassDist2 == null) { if (m_classIsNominal) { newDist = new double [m_theInstances.classAttribute().numValues()]; newDist[(int)inst.classValue()] = inst.weight(); // add to the table m_entries.put(thekey, newDist); } else { newDist = new double [2]; newDist[0] = inst.classValue() * inst.weight(); newDist[1] = inst.weight(); // add to the table m_entries.put(thekey, newDist); } } else { // update the distribution for this instance if (m_classIsNominal) { tempClassDist2[(int)inst.classValue()]+=inst.weight(); // update the table m_entries.put(thekey, tempClassDist2); } else { tempClassDist2[0] += (inst.classValue() * inst.weight()); tempClassDist2[1] += inst.weight(); // update the table m_entries.put(thekey, tempClassDist2); } } } /** * Classifies an instance for internal leave one out cross validation * of feature sets * * @param instance instance to be "left out" and classified * @param instA feature values of the selected features for the instance * @return the classification of the instance * @throws Exception if something goes wrong */ double evaluateInstanceLeaveOneOut(Instance instance, double [] instA) throws Exception { hashKey thekey; double [] tempDist; double [] normDist; thekey = new hashKey(instA); if (m_classIsNominal) { // if this one is not in the table if ((tempDist = (double [])m_entries.get(thekey)) == null) { throw new Error("This should never happen!"); } else { normDist = new double [tempDist.length]; System.arraycopy(tempDist,0,normDist,0,tempDist.length); normDist[(int)instance.classValue()] -= instance.weight(); // update the table // first check to see if the class counts are all zero now boolean ok = false; for (int i=0;i<normDist.length;i++) { if (!Utils.eq(normDist[i],0.0)) { ok = true; break; } }// downdate the class prior counts m_classPriorCounts[(int)instance.classValue()] -= instance.weight(); double [] classPriors = m_classPriorCounts.clone(); Utils.normalize(classPriors); if (!ok) { // majority class normDist = classPriors; } m_classPriorCounts[(int)instance.classValue()] += instance.weight(); //if (ok) { Utils.normalize(normDist); if (m_evaluationMeasure == EVAL_AUC) { m_evaluation.evaluateModelOnceAndRecordPrediction(normDist, instance); } else { m_evaluation.evaluateModelOnce(normDist, instance); } return Utils.maxIndex(normDist);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -