⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 vfi.java

📁 一个数据挖掘软件ALPHAMINERR的整个过程的JAVA版源代码
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*
 *    VFI.java
 *    Copyright (C) 2000 Mark Hall.
 *
 */

package weka.classifiers.misc;

import java.util.Enumeration;
import java.util.Vector;

import weka.classifiers.Classifier;
import weka.classifiers.Evaluation;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.UnsupportedClassTypeException;
import weka.core.Utils;
import weka.core.WeightedInstancesHandler;

/**
 * Class implementing the voting feature interval classifier. For numeric
 * attributes, upper and lower boundaries (intervals)  are constructed 
 * around each class. Discrete attributes have point intervals. Class counts
 * are recorded for each interval on each feature. Classification is by
 * voting. Missing values are ignored. Does not handle numeric class. <p>
 *
 * Have added a simple attribute weighting scheme. Higher weight is assigned
 * to more confident intervals, where confidence is a function of entropy:
 * weight (att_i) = (entropy of class distrib att_i / max uncertainty)^-bias.
 * <p>
 *
 * Faster than NaiveBayes but slower than HyperPipes. <p><p>
 *
 * <pre>
 *  Confidence: 0.01 (two tailed)
 *
 * Dataset                   (1) VFI '-B  | (2) Hyper (3) Naive
 *                         ------------------------------------
 * anneal.ORIG               (10)   74.56 |   97.88 v   74.77
 * anneal                    (10)   71.83 |   97.88 v   86.51 v
 * audiology                 (10)   51.69 |   66.26 v   72.25 v
 * autos                     (10)   57.63 |   62.79 v   57.76
 * balance-scale             (10)   68.72 |   46.08 *   90.5  v
 * breast-cancer             (10)   67.25 |   69.84 v   73.12 v
 * wisconsin-breast-cancer   (10)   95.72 |   88.31 *   96.05 v
 * horse-colic.ORIG          (10)   66.13 |   70.41 v   66.12
 * horse-colic               (10)   78.36 |   62.07 *   78.28
 * credit-rating             (10)   85.17 |   44.58 *   77.84 *
 * german_credit             (10)   70.81 |   69.89 *   74.98 v
 * pima_diabetes             (10)   62.13 |   65.47 v   75.73 v
 * Glass                     (10)   56.82 |   50.19 *   47.43 *
 * cleveland-14-heart-diseas (10)   80.01 |   55.18 *   83.83 v
 * hungarian-14-heart-diseas (10)   82.8  |   65.55 *   84.37 v
 * heart-statlog             (10)   79.37 |   55.56 *   84.37 v
 * hepatitis                 (10)   83.78 |   63.73 *   83.87
 * hypothyroid               (10)   92.64 |   93.33 v   95.29 v
 * ionosphere                (10)   94.16 |   35.9  *   82.6  *
 * iris                      (10)   96.2  |   91.47 *   95.27 *
 * kr-vs-kp                  (10)   88.22 |   54.1  *   87.84 *
 * labor                     (10)   86.73 |   87.67     93.93 v
 * lymphography              (10)   78.48 |   58.18 *   83.24 v
 * mushroom                  (10)   99.85 |   99.77 *   95.77 *
 * primary-tumor             (10)   29    |   24.78 *   49.35 v
 * segment                   (10)   77.42 |   75.15 *   80.1  v
 * sick                      (10)   65.92 |   93.85 v   92.71 v
 * sonar                     (10)   58.02 |   57.17     67.97 v
 * soybean                   (10)   86.81 |   86.12 *   92.9  v
 * splice                    (10)   88.61 |   41.97 *   95.41 v
 * vehicle                   (10)   52.94 |   32.77 *   44.8  *
 * vote                      (10)   91.5  |   61.38 *   90.19 *
 * vowel                     (10)   57.56 |   36.34 *   62.81 v
 * waveform                  (10)   56.33 |   46.11 *   80.02 v
 * zoo                       (10)   94.05 |   94.26     95.04 v
 *                          ------------------------------------
 *                                (v| |*) |  (9|3|23)  (22|5|8) 
 * </pre> 					
 * <p>
 *
 * For more information, see <p>
 * 
 * Demiroz, G. and Guvenir, A. (1997) "Classification by voting feature 
 * intervals", <i>ECML-97</i>. <p>
 *  
 * Valid options are:<p>
 *
 * -C <br>
 * Don't Weight voting intervals by confidence. <p>
 *
 * -B <bias> <br>
 * Set exponential bias towards confident intervals. default = 1.0 <p>
 *
 * @author Mark Hall (mhall@cs.waikato.ac.nz)
 * @version $Revision$
 */
public class VFI extends Classifier 
  implements OptionHandler, WeightedInstancesHandler {

  /** The index of the class attribute */
  protected int m_ClassIndex;

  /** The number of classes */
  protected int m_NumClasses;

  /** The training data */
  protected Instances m_Instances = null;

  /** The class counts for each interval of each attribute */
  protected double [][][] m_counts;

  /** The global class counts */
  protected double [] m_globalCounts;

  /** The lower bounds for each attribute */
  protected double [][] m_intervalBounds;

  /** The maximum entropy for the class */
  protected double m_maxEntrop;

  /** Exponentially bias more confident intervals */
  protected boolean m_weightByConfidence = true;

  /** Bias towards more confident intervals */
  protected double m_bias = -0.6;

  private double TINY = 0.1e-10;

  /**
   * Returns a string describing this search method
   * @return a description of the search method suitable for
   * displaying in the explorer/experimenter gui
   */
  public String globalInfo() {
    return "Classification by voting feature intervals. Intervals are "
      +"constucted around each class for each attribute ("
      +"basically discretization). Class counts are "
      +"recorded for each interval on each attribute. Classification is by "
      +"voting. For more info see Demiroz, G. and Guvenir, A. (1997) "
      +"\"Classification by voting feature intervals\", ECML-97.\n\n"
      +"Have added a simple attribute weighting scheme. Higher weight is "
      +"assigned to more confident intervals, where confidence is a function "
      +"of entropy:\nweight (att_i) = (entropy of class distrib att_i / "
      +"max uncertainty)^-bias";
  }

  /**
   * Returns an enumeration describing the available options.
   *
   * @return an enumeration of all the available options.
   */
  public Enumeration listOptions() {

    Vector newVector = new Vector(2);

    newVector.addElement(
    new Option("\tDon't weight voting intervals by confidence",
	       "C", 0,"-C"));
    newVector.addElement(
    new Option("\tSet exponential bias towards confident intervals\n"
	       +"\t(default = 1.0)",
	       "B", 1,"-B <bias>"));

    return newVector.elements();
  }

  /**
   * Parses a given list of options. Valid options are:<p>
   *
   * -C <br>
   * Don't weight voting intervals by confidence. <p>
   *
   * -B <bias> <br>
   * Set exponential bias towards confident intervals. default = 1.0 <p>
   *
   * @param options the list of options as an array of strings
   * @exception Exception if an option is not supported
   */
  public void setOptions(String[] options) throws Exception {
    String optionString;
    
    setWeightByConfidence(!Utils.getFlag('C', options));
    
    optionString = Utils.getOption('B', options);
    if (optionString.length() != 0) {
      Double temp = new Double(optionString);
      setBias(temp.doubleValue());
    }

    Utils.checkForRemainingOptions(options);
  }

  /**
   * Returns the tip text for this property
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String weightByConfidenceTipText() {
    return "Weight feature intervals by confidence";
  }

  /**
   * Set weighting by confidence
   * @param c true if feature intervals are to be weighted by confidence
   */
  public void setWeightByConfidence(boolean c) {
    m_weightByConfidence = c;
  }

  /**
   * Get whether feature intervals are being weighted by confidence
   * @return true if weighting by confidence is selected
   */
  public boolean getWeightByConfidence() {
    return m_weightByConfidence;
  }

  /**
   * Returns the tip text for this property
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String biasTipText() {
    return "Strength of bias towards more confident features";
  }

  /**
   * Set the value of the exponential bias towards more confident intervals
   * @param b the value of the bias parameter
   */
  public void setBias(double b) {
    m_bias = -b;
  }

  /**
   * Get the value of the bias parameter
   * @return the bias parameter
   */
  public double getBias() {
    return -m_bias;
  }

  /**
   * Gets the current settings of VFI
   *
   * @return an array of strings suitable for passing to setOptions()
   */
  public String[] getOptions () {
    String[] options = new String[3];
    int current = 0;
    
    if (!getWeightByConfidence()) {
      options[current++] = "-C";
    }

    options[current++] = "-B"; options[current++] = ""+getBias();
    while (current < options.length) {
      options[current++] = "";
    }
    
    return options;
  }
  

  /**
   * Generates the classifier.
   *

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -