📄 vfi.java
字号:
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* VFI.java
* Copyright (C) 2000 Mark Hall.
*
*/
package weka.classifiers.misc;
import java.util.Enumeration;
import java.util.Vector;
import weka.classifiers.Classifier;
import weka.classifiers.Evaluation;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.UnsupportedClassTypeException;
import weka.core.Utils;
import weka.core.WeightedInstancesHandler;
/**
* Class implementing the voting feature interval classifier. For numeric
* attributes, upper and lower boundaries (intervals) are constructed
* around each class. Discrete attributes have point intervals. Class counts
* are recorded for each interval on each feature. Classification is by
* voting. Missing values are ignored. Does not handle numeric class. <p>
*
* Have added a simple attribute weighting scheme. Higher weight is assigned
* to more confident intervals, where confidence is a function of entropy:
* weight (att_i) = (entropy of class distrib att_i / max uncertainty)^-bias.
* <p>
*
* Faster than NaiveBayes but slower than HyperPipes. <p><p>
*
* <pre>
* Confidence: 0.01 (two tailed)
*
* Dataset (1) VFI '-B | (2) Hyper (3) Naive
* ------------------------------------
* anneal.ORIG (10) 74.56 | 97.88 v 74.77
* anneal (10) 71.83 | 97.88 v 86.51 v
* audiology (10) 51.69 | 66.26 v 72.25 v
* autos (10) 57.63 | 62.79 v 57.76
* balance-scale (10) 68.72 | 46.08 * 90.5 v
* breast-cancer (10) 67.25 | 69.84 v 73.12 v
* wisconsin-breast-cancer (10) 95.72 | 88.31 * 96.05 v
* horse-colic.ORIG (10) 66.13 | 70.41 v 66.12
* horse-colic (10) 78.36 | 62.07 * 78.28
* credit-rating (10) 85.17 | 44.58 * 77.84 *
* german_credit (10) 70.81 | 69.89 * 74.98 v
* pima_diabetes (10) 62.13 | 65.47 v 75.73 v
* Glass (10) 56.82 | 50.19 * 47.43 *
* cleveland-14-heart-diseas (10) 80.01 | 55.18 * 83.83 v
* hungarian-14-heart-diseas (10) 82.8 | 65.55 * 84.37 v
* heart-statlog (10) 79.37 | 55.56 * 84.37 v
* hepatitis (10) 83.78 | 63.73 * 83.87
* hypothyroid (10) 92.64 | 93.33 v 95.29 v
* ionosphere (10) 94.16 | 35.9 * 82.6 *
* iris (10) 96.2 | 91.47 * 95.27 *
* kr-vs-kp (10) 88.22 | 54.1 * 87.84 *
* labor (10) 86.73 | 87.67 93.93 v
* lymphography (10) 78.48 | 58.18 * 83.24 v
* mushroom (10) 99.85 | 99.77 * 95.77 *
* primary-tumor (10) 29 | 24.78 * 49.35 v
* segment (10) 77.42 | 75.15 * 80.1 v
* sick (10) 65.92 | 93.85 v 92.71 v
* sonar (10) 58.02 | 57.17 67.97 v
* soybean (10) 86.81 | 86.12 * 92.9 v
* splice (10) 88.61 | 41.97 * 95.41 v
* vehicle (10) 52.94 | 32.77 * 44.8 *
* vote (10) 91.5 | 61.38 * 90.19 *
* vowel (10) 57.56 | 36.34 * 62.81 v
* waveform (10) 56.33 | 46.11 * 80.02 v
* zoo (10) 94.05 | 94.26 95.04 v
* ------------------------------------
* (v| |*) | (9|3|23) (22|5|8)
* </pre>
* <p>
*
* For more information, see <p>
*
* Demiroz, G. and Guvenir, A. (1997) "Classification by voting feature
* intervals", <i>ECML-97</i>. <p>
*
* Valid options are:<p>
*
* -C <br>
* Don't Weight voting intervals by confidence. <p>
*
* -B <bias> <br>
* Set exponential bias towards confident intervals. default = 1.0 <p>
*
* @author Mark Hall (mhall@cs.waikato.ac.nz)
* @version $Revision$
*/
public class VFI extends Classifier
implements OptionHandler, WeightedInstancesHandler {
/** The index of the class attribute */
protected int m_ClassIndex;
/** The number of classes */
protected int m_NumClasses;
/** The training data */
protected Instances m_Instances = null;
/** The class counts for each interval of each attribute */
protected double [][][] m_counts;
/** The global class counts */
protected double [] m_globalCounts;
/** The lower bounds for each attribute */
protected double [][] m_intervalBounds;
/** The maximum entropy for the class */
protected double m_maxEntrop;
/** Exponentially bias more confident intervals */
protected boolean m_weightByConfidence = true;
/** Bias towards more confident intervals */
protected double m_bias = -0.6;
private double TINY = 0.1e-10;
/**
* Returns a string describing this search method
* @return a description of the search method suitable for
* displaying in the explorer/experimenter gui
*/
public String globalInfo() {
return "Classification by voting feature intervals. Intervals are "
+"constucted around each class for each attribute ("
+"basically discretization). Class counts are "
+"recorded for each interval on each attribute. Classification is by "
+"voting. For more info see Demiroz, G. and Guvenir, A. (1997) "
+"\"Classification by voting feature intervals\", ECML-97.\n\n"
+"Have added a simple attribute weighting scheme. Higher weight is "
+"assigned to more confident intervals, where confidence is a function "
+"of entropy:\nweight (att_i) = (entropy of class distrib att_i / "
+"max uncertainty)^-bias";
}
/**
* Returns an enumeration describing the available options.
*
* @return an enumeration of all the available options.
*/
public Enumeration listOptions() {
Vector newVector = new Vector(2);
newVector.addElement(
new Option("\tDon't weight voting intervals by confidence",
"C", 0,"-C"));
newVector.addElement(
new Option("\tSet exponential bias towards confident intervals\n"
+"\t(default = 1.0)",
"B", 1,"-B <bias>"));
return newVector.elements();
}
/**
* Parses a given list of options. Valid options are:<p>
*
* -C <br>
* Don't weight voting intervals by confidence. <p>
*
* -B <bias> <br>
* Set exponential bias towards confident intervals. default = 1.0 <p>
*
* @param options the list of options as an array of strings
* @exception Exception if an option is not supported
*/
public void setOptions(String[] options) throws Exception {
String optionString;
setWeightByConfidence(!Utils.getFlag('C', options));
optionString = Utils.getOption('B', options);
if (optionString.length() != 0) {
Double temp = new Double(optionString);
setBias(temp.doubleValue());
}
Utils.checkForRemainingOptions(options);
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String weightByConfidenceTipText() {
return "Weight feature intervals by confidence";
}
/**
* Set weighting by confidence
* @param c true if feature intervals are to be weighted by confidence
*/
public void setWeightByConfidence(boolean c) {
m_weightByConfidence = c;
}
/**
* Get whether feature intervals are being weighted by confidence
* @return true if weighting by confidence is selected
*/
public boolean getWeightByConfidence() {
return m_weightByConfidence;
}
/**
* Returns the tip text for this property
* @return tip text for this property suitable for
* displaying in the explorer/experimenter gui
*/
public String biasTipText() {
return "Strength of bias towards more confident features";
}
/**
* Set the value of the exponential bias towards more confident intervals
* @param b the value of the bias parameter
*/
public void setBias(double b) {
m_bias = -b;
}
/**
* Get the value of the bias parameter
* @return the bias parameter
*/
public double getBias() {
return -m_bias;
}
/**
* Gets the current settings of VFI
*
* @return an array of strings suitable for passing to setOptions()
*/
public String[] getOptions () {
String[] options = new String[3];
int current = 0;
if (!getWeightByConfidence()) {
options[current++] = "-C";
}
options[current++] = "-B"; options[current++] = ""+getBias();
while (current < options.length) {
options[current++] = "";
}
return options;
}
/**
* Generates the classifier.
*
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -