aode.java

来自「代码是一个分类器的实现,其中使用了部分weka的源代码。可以将项目导入eclip」· Java 代码 · 共 772 行 · 第 1/2 页
JAVA
772 行
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* *    AODE.java *    Copyright (C) 2003 *    Algorithm developed by: Geoff Webb *    Code written by: Janice Boughton & Zhihai Wang */package weka.classifiers.bayes;import weka.classifiers.Classifier;import weka.classifiers.UpdateableClassifier;import weka.core.Capabilities;import weka.core.Instance;import weka.core.Instances;import weka.core.Option;import weka.core.OptionHandler;import weka.core.TechnicalInformation;import weka.core.TechnicalInformationHandler;import weka.core.Utils;import weka.core.WeightedInstancesHandler;import weka.core.Capabilities.Capability;import weka.core.TechnicalInformation.Field;import weka.core.TechnicalInformation.Type;import java.util.Enumeration;import java.util.Vector;/** <!-- globalinfo-start --> * AODE achieves highly accurate classification by averaging over all of a small space of alternative naive-Bayes-like models that have weaker (and hence less detrimental) independence assumptions than naive Bayes. The resulting algorithm is computationally efficient while delivering highly accurate classification on many learning  tasks.<br/> * <br/> * For more information, see<br/> * <br/> * G. Webb, J. Boughton, Z. Wang (2005). Not So Naive Bayes: Aggregating One-Dependence Estimators. Machine Learning. 58(1):5-24.<br/> * <br/> * Further papers are available at<br/> *   http://www.csse.monash.edu.au/~webb/.<br/> * <br/> * Can use an m-estimate for smoothing base probability estimates in place of the Laplace correction (via option -M).<br/> * Default frequency limit set to 1. * <p/> <!-- globalinfo-end --> *  <!-- technical-bibtex-start --> * BibTeX: * <pre> * &#64;article{Webb2005, *    author = {G. Webb and J. Boughton and Z. Wang}, *    journal = {Machine Learning}, *    number = {1}, *    pages = {5-24}, *    title = {Not So Naive Bayes: Aggregating One-Dependence Estimators}, *    volume = {58}, *    year = {2005} * } * </pre> * <p/> <!-- technical-bibtex-end --> * <!-- options-start --> * Valid options are: <p/> *  * <pre> -D *  Output debugging information * </pre> *  * <pre> -F &lt;int&gt; *  Impose a frequency limit for superParents *  (default is 1)</pre> *  * <pre> -M *  Use m-estimate instead of laplace correction * </pre> *  * <pre> -W &lt;int&gt; *  Specify a weight to use with m-estimate *  (default is 1)</pre> *  <!-- options-end --> * * @author Janice Boughton (jrbought@csse.monash.edu.au) * @author Zhihai Wang (zhw@csse.monash.edu.au) * @version $Revision: 1.16 $ */public class AODE extends Classifier    implements OptionHandler, WeightedInstancesHandler, UpdateableClassifier,                TechnicalInformationHandler {      /** for serialization */  static final long serialVersionUID = 9197439980415113523L;    /**   * 3D array (m_NumClasses * m_TotalAttValues * m_TotalAttValues)   * of attribute counts, i.e., the number of times an attribute value occurs   * in conjunction with another attribute value and a class value.     */  private double [][][] m_CondiCounts;      /** The number of times each class value occurs in the dataset */  private double [] m_ClassCounts;      /** The sums of attribute-class counts     *    -- if there are no missing values for att, then   *       m_SumForCounts[classVal][att] will be the same as   *       m_ClassCounts[classVal]    */  private double [][] m_SumForCounts;  /** The number of classes */  private int m_NumClasses;      /** The number of attributes in dataset, including class */  private int m_NumAttributes;      /** The number of instances in the dataset */  private int m_NumInstances;      /** The index of the class attribute */  private int m_ClassIndex;      /** The dataset */  private Instances m_Instances;      /**   * The total number of values (including an extra for each attribute's    * missing value, which are included in m_CondiCounts) for all attributes    * (not including class). E.g., for three atts each with two possible values,   * m_TotalAttValues would be 9 (6 values + 3 missing).   * This variable is used when allocating space for m_CondiCounts matrix.   */  private int m_TotalAttValues;      /** The starting index (in the m_CondiCounts matrix) of the values for each   * attribute */  private int [] m_StartAttIndex;      /** The number of values for each attribute */  private int [] m_NumAttValues;      /** The frequency of each attribute value for the dataset */  private double [] m_Frequencies;  /** The number of valid class values observed in dataset    *  -- with no missing classes, this number is the same as m_NumInstances.   */  private double m_SumInstances;  /** An att's frequency must be this value or more to be a superParent */  private int m_Limit = 1;  /** If true, outputs debugging info */  private boolean m_Debug = false;  /** flag for using m-estimates */  private boolean m_MEstimates = false;  /** value for m in m-estimate */  private int m_Weight = 1;   /**   * Returns a string describing this classifier   * @return a description of the classifier suitable for   * displaying in the explorer/experimenter gui   */  public String globalInfo() {    return "AODE achieves highly accurate classification by averaging over "      +"all of a small space of alternative naive-Bayes-like models that have "      +"weaker (and hence less detrimental) independence assumptions than "      +"naive Bayes. The resulting algorithm is computationally efficient "      +"while delivering highly accurate classification on many learning  "      +"tasks.\n\n"      +"For more information, see\n\n"      + getTechnicalInformation().toString() + "\n\n"      +"Further papers are available at\n"      +"  http://www.csse.monash.edu.au/~webb/.\n\n"      + "Can use an m-estimate for smoothing base probability estimates "      + "in place of the Laplace correction (via option -M).\n"      + "Default frequency limit set to 1.";  }  /**   * Returns an instance of a TechnicalInformation object, containing    * detailed information about the technical background of this class,   * e.g., paper reference or book this class is based on.   *    * @return the technical information about this class   */  public TechnicalInformation getTechnicalInformation() {    TechnicalInformation 	result;        result = new TechnicalInformation(Type.ARTICLE);    result.setValue(Field.AUTHOR, "G. Webb and J. Boughton and Z. Wang");    result.setValue(Field.YEAR, "2005");    result.setValue(Field.TITLE, "Not So Naive Bayes: Aggregating One-Dependence Estimators");    result.setValue(Field.JOURNAL, "Machine Learning");    result.setValue(Field.VOLUME, "58");    result.setValue(Field.NUMBER, "1");    result.setValue(Field.PAGES, "5-24");    return result;  }  /**   * Returns default capabilities of the classifier.   *   * @return      the capabilities of this classifier   */  public Capabilities getCapabilities() {    Capabilities result = super.getCapabilities();    // attributes    result.enable(Capability.NOMINAL_ATTRIBUTES);    result.enable(Capability.MISSING_VALUES);    // class    result.enable(Capability.NOMINAL_CLASS);    result.enable(Capability.MISSING_CLASS_VALUES);    // instances    result.setMinimumNumberInstances(0);        return result;  }   /**   * Generates the classifier.   *   * @param instances set of instances serving as training data   * @throws Exception if the classifier has not been generated   * successfully   */  public void buildClassifier(Instances instances) throws Exception {    // can classifier handle the data?    getCapabilities().testWithFail(instances);    // remove instances with missing class    m_Instances = new Instances(instances);    m_Instances.deleteWithMissingClass();    // reset variable for this fold    m_SumInstances = 0;    m_ClassIndex = instances.classIndex();    m_NumInstances = m_Instances.numInstances();    m_NumAttributes = m_Instances.numAttributes();    m_NumClasses = m_Instances.numClasses();     // allocate space for attribute reference arrays    m_StartAttIndex = new int[m_NumAttributes];    m_NumAttValues = new int[m_NumAttributes];     m_TotalAttValues = 0;    for(int i = 0; i < m_NumAttributes; i++) {       if(i != m_ClassIndex) {          m_StartAttIndex[i] = m_TotalAttValues;          m_NumAttValues[i] = m_Instances.attribute(i).numValues();          m_TotalAttValues += m_NumAttValues[i] + 1;          // + 1 so room for missing value count       } else {          // m_StartAttIndex[i] = -1;  // class isn't included	  m_NumAttValues[i] = m_NumClasses;       }    }    // allocate space for counts and frequencies    m_CondiCounts = new double[m_NumClasses][m_TotalAttValues][m_TotalAttValues];    m_ClassCounts = new double[m_NumClasses];    m_SumForCounts = new double[m_NumClasses][m_NumAttributes];    m_Frequencies = new double[m_TotalAttValues];    // calculate the counts    for(int k = 0; k < m_NumInstances; k++) {       addToCounts((Instance)m_Instances.instance(k));    }    // free up some space    m_Instances = new Instances(m_Instances, 0);  }   /**   * Updates the classifier with the given instance.   *   * @param instance the new training instance to include in the model    */    public void updateClassifier(Instance instance) {	this.addToCounts(instance);    }    /**      * Puts an instance's values into m_CondiCounts, m_ClassCounts and      * m_SumInstances.     *     * @param instance  the instance whose values are to be put into the counts     *                  variables     */  private void addToCounts(Instance instance) {     double [] countsPointer;     if(instance.classIsMissing())       return;   // ignore instances with missing class    int classVal = (int)instance.classValue();    int weight = (int)instance.weight();     m_ClassCounts[classVal] += weight;    m_SumInstances += weight;       // store instance's att val indexes in an array, b/c accessing it     // in loop(s) is more efficient    int [] attIndex = new int[m_NumAttributes];    for(int i = 0; i < m_NumAttributes; i++) {       if(i == m_ClassIndex)          attIndex[i] = -1;  // we don't use the class attribute in counts       else {          if(instance.isMissing(i))             attIndex[i] = m_StartAttIndex[i] + m_NumAttValues[i];          else             attIndex[i] = m_StartAttIndex[i] + (int)instance.value(i);       }    }    for(int Att1 = 0; Att1 < m_NumAttributes; Att1++) {       if(attIndex[Att1] == -1)          continue;   // avoid pointless looping as Att1 is currently the class attribute       m_Frequencies[attIndex[Att1]] += weight;              // if this is a missing value, we don't want to increase sumforcounts       if(!instance.isMissing(Att1))          m_SumForCounts[classVal][Att1] += weight;       // save time by referencing this now, rather than do it repeatedly in the loop       countsPointer = m_CondiCounts[classVal][attIndex[Att1]];       for(int Att2 = 0; Att2 < m_NumAttributes; Att2++) {          if(attIndex[Att2] != -1) {             countsPointer[attIndex[Att2]] += weight;          }       }    }  }    /**   * Calculates the class membership probabilities for the given test   * instance.   *   * @param instance the instance to be classified   * @return predicted class probability distribution   * @throws Exception if there is a problem generating the prediction   */  public double [] distributionForInstance(Instance instance) throws Exception {     // accumulates posterior probabilities for each class    double [] probs = new double[m_NumClasses];    // index for parent attribute value, and a count of parents used    int pIndex, parentCount;     // pointers for efficiency    // for current class, point to joint frequency for any pair of att values    double [][] countsForClass;    // for current class & parent, point to joint frequency for any att value    double [] countsForClassParent;    // store instance's att indexes in an int array, so accessing them
aode.java - 源码说明

本页面展示了「代码是一个分类器的实现,其中使用了部分weka的源代码。可以将项目导入eclipse运行」中的 aode.java 源码文件，采用 Java 编程语言编写，共 772 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与eclipse相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?