📄 vfi.java
字号:
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * VFI.java * Copyright (C) 2000 Mark Hall. * */package weka.classifiers.misc;import weka.classifiers.Evaluation;import weka.classifiers.Classifier;import weka.classifiers.DistributionClassifier;import weka.core.Attribute;import weka.core.Instance;import weka.core.Instances;import weka.core.Utils;import weka.core.OptionHandler;import weka.core.Option;import weka.core.WeightedInstancesHandler;import weka.core.UnsupportedClassTypeException;import java.io.*;import java.util.Enumeration;import java.util.Vector;/** * Class implementing the voting feature interval classifier. For numeric * attributes, upper and lower boundaries (intervals) are constructed * around each class. Discrete attributes have point intervals. Class counts * are recorded for each interval on each feature. Classification is by * voting. Missing values are ignored. Does not handle numeric class. <p> * * Have added a simple attribute weighting scheme. Higher weight is assigned * to more confident intervals, where confidence is a function of entropy: * weight (att_i) = (entropy of class distrib att_i / max uncertainty)^-bias. * <p> * * Faster than NaiveBayes but slower than HyperPipes. <p><p> * * <pre> * Confidence: 0.01 (two tailed) * * Dataset (1) VFI '-B | (2) Hyper (3) Naive * ------------------------------------ * anneal.ORIG (10) 74.56 | 97.88 v 74.77 * anneal (10) 71.83 | 97.88 v 86.51 v * audiology (10) 51.69 | 66.26 v 72.25 v * autos (10) 57.63 | 62.79 v 57.76 * balance-scale (10) 68.72 | 46.08 * 90.5 v * breast-cancer (10) 67.25 | 69.84 v 73.12 v * wisconsin-breast-cancer (10) 95.72 | 88.31 * 96.05 v * horse-colic.ORIG (10) 66.13 | 70.41 v 66.12 * horse-colic (10) 78.36 | 62.07 * 78.28 * credit-rating (10) 85.17 | 44.58 * 77.84 * * german_credit (10) 70.81 | 69.89 * 74.98 v * pima_diabetes (10) 62.13 | 65.47 v 75.73 v * Glass (10) 56.82 | 50.19 * 47.43 * * cleveland-14-heart-diseas (10) 80.01 | 55.18 * 83.83 v * hungarian-14-heart-diseas (10) 82.8 | 65.55 * 84.37 v * heart-statlog (10) 79.37 | 55.56 * 84.37 v * hepatitis (10) 83.78 | 63.73 * 83.87 * hypothyroid (10) 92.64 | 93.33 v 95.29 v * ionosphere (10) 94.16 | 35.9 * 82.6 * * iris (10) 96.2 | 91.47 * 95.27 * * kr-vs-kp (10) 88.22 | 54.1 * 87.84 * * labor (10) 86.73 | 87.67 93.93 v * lymphography (10) 78.48 | 58.18 * 83.24 v * mushroom (10) 99.85 | 99.77 * 95.77 * * primary-tumor (10) 29 | 24.78 * 49.35 v * segment (10) 77.42 | 75.15 * 80.1 v * sick (10) 65.92 | 93.85 v 92.71 v * sonar (10) 58.02 | 57.17 67.97 v * soybean (10) 86.81 | 86.12 * 92.9 v * splice (10) 88.61 | 41.97 * 95.41 v * vehicle (10) 52.94 | 32.77 * 44.8 * * vote (10) 91.5 | 61.38 * 90.19 * * vowel (10) 57.56 | 36.34 * 62.81 v * waveform (10) 56.33 | 46.11 * 80.02 v * zoo (10) 94.05 | 94.26 95.04 v * ------------------------------------ * (v| |*) | (9|3|23) (22|5|8) * </pre> * <p> * * For more information, see <p> * * Demiroz, G. and Guvenir, A. (1997) "Classification by voting feature * intervals", <i>ECML-97</i>. <p> * * Valid options are:<p> * * -C <br> * Don't Weight voting intervals by confidence. <p> * * -B <bias> <br> * Set exponential bias towards confident intervals. default = 1.0 <p> * * @author Mark Hall (mhall@cs.waikato.ac.nz) * @version $Revision: 1.1.1.1 $ */public class VFI extends DistributionClassifier implements OptionHandler, WeightedInstancesHandler { /** The index of the class attribute */ protected int m_ClassIndex; /** The number of classes */ protected int m_NumClasses; /** The training data */ protected Instances m_Instances = null; /** The class counts for each interval of each attribute */ protected double [][][] m_counts; /** The global class counts */ protected double [] m_globalCounts; /** The lower bounds for each attribute */ protected double [][] m_intervalBounds; /** The maximum entropy for the class */ protected double m_maxEntrop; /** Exponentially bias more confident intervals */ protected boolean m_weightByConfidence = true; /** Bias towards more confident intervals */ protected double m_bias = -0.6; private double TINY = 0.1e-10; /** * Returns a string describing this search method * @return a description of the search method suitable for * displaying in the explorer/experimenter gui */ public String globalInfo() { return "Classification by voting feature intervals. Intervals are " +"constucted around each class for each attribute (" +"basically discretization). Class counts are " +"recorded for each interval on each attribute. Classification is by " +"voting. For more info see Demiroz, G. and Guvenir, A. (1997) " +"\"Classification by voting feature intervals\", ECML-97.\n\n" +"Have added a simple attribute weighting scheme. Higher weight is " +"assigned to more confident intervals, where confidence is a function " +"of entropy:\nweight (att_i) = (entropy of class distrib att_i / " +"max uncertainty)^-bias"; } /** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options. */ public Enumeration listOptions() { Vector newVector = new Vector(2); newVector.addElement( new Option("\tDon't weight voting intervals by confidence", "C", 0,"-C")); newVector.addElement( new Option("\tSet exponential bias towards confident intervals\n" +"\t(default = 1.0)", "B", 1,"-B <bias>")); return newVector.elements(); } /** * Parses a given list of options. Valid options are:<p> * * -C <br> * Don't weight voting intervals by confidence. <p> * * -B <bias> <br> * Set exponential bias towards confident intervals. default = 1.0 <p> * * @param options the list of options as an array of strings * @exception Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { String optionString; setWeightByConfidence(!Utils.getFlag('C', options)); optionString = Utils.getOption('B', options); if (optionString.length() != 0) { Double temp = new Double(optionString); setBias(temp.doubleValue()); } Utils.checkForRemainingOptions(options); } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String weightByConfidenceTipText() { return "Weight feature intervals by confidence"; } /** * Set weighting by confidence * @param c true if feature intervals are to be weighted by confidence */ public void setWeightByConfidence(boolean c) { m_weightByConfidence = c; } /** * Get whether feature intervals are being weighted by confidence * @return true if weighting by confidence is selected */ public boolean getWeightByConfidence() { return m_weightByConfidence; } /** * Returns the tip text for this property * @return tip text for this property suitable for * displaying in the explorer/experimenter gui */ public String biasTipText() { return "Strength of bias towards more confident features"; } /** * Set the value of the exponential bias towards more confident intervals * @param b the value of the bias parameter */ public void setBias(double b) { m_bias = -b; } /** * Get the value of the bias parameter * @return the bias parameter */ public double getBias() { return -m_bias; } /** * Gets the current settings of VFI * * @return an array of strings suitable for passing to setOptions() */ public String[] getOptions () { String[] options = new String[3]; int current = 0; if (!getWeightByConfidence()) { options[current++] = "-C"; } options[current++] = "-B"; options[current++] = ""+getBias(); while (current < options.length) { options[current++] = "";
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -