aodesr.java

来自「Weka」· Java 代码 · 共 916 行 · 第 1/2 页

JAVA
916
字号
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* *    AODEsr.java *    Copyright (C) 2007 *    Algorithm developed by: Fei ZHENG and Geoff Webb *    Code written by: Fei ZHENG and Janice Boughton */package weka.classifiers.bayes;import weka.classifiers.Classifier;import weka.classifiers.UpdateableClassifier;import weka.core.Capabilities;import weka.core.Instance;import weka.core.Instances;import weka.core.Option;import weka.core.OptionHandler;import weka.core.TechnicalInformation;import weka.core.TechnicalInformationHandler;import weka.core.Utils;import weka.core.WeightedInstancesHandler;import weka.core.Capabilities.Capability;import weka.core.TechnicalInformation.Field;import weka.core.TechnicalInformation.Type;import java.util.Enumeration;import java.util.Vector;/** * <!-- globalinfo-start --> * AODEsr augments AODE with Subsumption Resolution. * AODEsr detects specializations between two attribute values at * classification time and deletes the generalization attribute value. * <br/> * For more information, see<br/> * <br/> * Zheng, F., Webb, G.I. (2006): Efficient lazy elimination for * averaged-one dependence * estimators. In: Proc. 23th Int. Conf. Machine Learning (ICML 2006), * 1113-1120 * <br/> * Note: the subsumption resolution technique is called lazy elimination * in the ICML paper. * <br/> <!-- globalinfo-end --> * <!-- technical-bibtex-start --> * BibTeX: * <pre> * &#64;INPROCEEDINGS{ZhengWebbICML2006, *    AUTHOR = {Fei Zheng and Geoffrey I. Webb}, *    TITLE = {Efficient Lazy Elimination for Averaged-One Dependence *             Estimators}, *    BOOKTITLE = {Proceedings of the Twenty-third International *                 Conference on Machine  Learning (ICML 2006)}, *    ISBN = {1-59593-383-2}, *    PAGES = {1113--1120}, *    PUBLISHER = {ACM Press}, *    YEAR = {2006},  } * } * </pre> * <p/> <!-- technical-bibtex-end --> * <!-- options-start --> * Valid options are:<p/> * * <pre> -D *  Output debugging information * </pre> *  * <pre> -F &lt;int&gt; *  Impose a frequency limit for superParents *  (default is 1)</pre> * * <pre> -L *  Use Laplace estimation *  (default is m-estimation)</pre> * * <pre> -M &lt;double&gt; *  Specify the m value of m-estimation *  (default is 1)</pre> * * <pre>-C &lt;int&gt; *  Specify critical value for specialization-generalization. *  (default is 50). *  Larger values than the default of 50 substantially reduce *  the risk of incorrectly inferring that one value subsumes *  another, but also reduces the number of true subsumptions *  that are detected.</pre> * <!-- options-end --> * * @author Fei Zheng * @author Janice Boughton * @version $Revision: 1.2 $ */public class AODEsr extends Classifier    implements OptionHandler, WeightedInstancesHandler, UpdateableClassifier,               TechnicalInformationHandler {  /** for serialization */  static final long serialVersionUID = 5602143019183068848L;  /**   * 3D array (m_NumClasses * m_TotalAttValues * m_TotalAttValues)   * of attribute counts, i.e. the number of times an attribute value occurs   * in conjunction with another attribute value and a class value.     */  private double [][][] m_CondiCounts;   /**   * 2D array (m_TotalAttValues * m_TotalAttValues) of attributes counts.   * similar to m_CondiCounts, but ignoring class value.   */    private double [][] m_CondiCountsNoClass;       /** The number of times each class value occurs in the dataset */  private double [] m_ClassCounts;      /** The sums of attribute-class counts     *    -- if there are no missing values for att, then   *       m_SumForCounts[classVal][att] will be the same as   *       m_ClassCounts[classVal]    */  private double [][] m_SumForCounts;  /** The number of classes */  private int m_NumClasses;   /** The number of attributes in dataset, including class */  private int m_NumAttributes;      /** The number of instances in the dataset */  private int m_NumInstances;      /** The index of the class attribute */  private int m_ClassIndex;      /** The dataset */  private Instances m_Instances;      /**   * The total number of values (including an extra for each attribute's    * missing value, which are included in m_CondiCounts) for all attributes    * (not including class).  Eg. for three atts each with two possible values,   * m_TotalAttValues would be 9 (6 values + 3 missing).   * This variable is used when allocating space for m_CondiCounts matrix.   */  private int m_TotalAttValues;      /** The starting index (in the m_CondiCounts matrix) of the values for each attribute */  private int [] m_StartAttIndex;      /** The number of values for each attribute */  private int [] m_NumAttValues;      /** The frequency of each attribute value for the dataset */  private double [] m_Frequencies;  /** The number of valid class values observed in dataset    *  -- with no missing classes, this number is the same as m_NumInstances.   */  private double m_SumInstances;  /** An att's frequency must be this value or more to be a superParent */  private int m_Limit = 1;  /** If true, outputs debugging info */  private boolean m_Debug = false;    /** m value for m-estimation */  protected  double m_MWeight = 1.0;    /** Using LapLace estimation or not*/  private boolean m_Laplace = false;    /** the critical value for the specialization-generalization */  private int m_Critical = 50;   /**   * Returns a string describing this classifier   * @return a description of the classifier suitable for   * displaying in the explorer/experimenter gui   */  public String globalInfo() {    return "AODEsr augments AODE with Subsumption Resolution."      +"AODEsr detects specializations between two attribute "      +"values at classification time and deletes the generalization "      +"attribute value.\n"      +"For more information, see:\n"      +"Zheng, F., Webb, G.I. (2006): Efficient lazy elimination for "      +"averaged-one dependence "      +"estimators. In: Proc. 23th Int. Conf. Machine Learning (ICML 2006), "      +"1113-1120";  }   /**   * Returns an instance of a TechnicalInformation object, containing   * detailed information about the technical background of this class,   * e.g., paper reference or book this class is based on.   *   * @return the technical information about this class   */  public TechnicalInformation getTechnicalInformation() {    TechnicalInformation        result;    result = new TechnicalInformation(Type.INPROCEEDINGS);    result.setValue(Field.AUTHOR, "Fei Zheng and Geoffrey I. Webb");    result.setValue(Field.YEAR, "2006");    result.setValue(Field.TITLE, "Efficient Lazy Elimination for Averaged-One Dependence Estimators");    result.setValue(Field.PAGES, "1113-1120");    result.setValue(Field.BOOKTITLE, "Proceedings of the Twenty-third International Conference on Machine  Learning (ICML 2006)");    result.setValue(Field.PUBLISHER, "ACM Press");    result.setValue(Field.ISBN, "1-59593-383-2");    return result;  } /**  * Returns default capabilities of the classifier.  *  * @return      the capabilities of this classifier  */  public Capabilities getCapabilities() {    Capabilities result = super.getCapabilities();    // attributes    result.enable(Capability.NOMINAL_ATTRIBUTES);    result.enable(Capability.MISSING_VALUES);    // class    result.enable(Capability.NOMINAL_CLASS);    result.enable(Capability.MISSING_CLASS_VALUES);    // instances    result.setMinimumNumberInstances(0);    return result;  }  /**   * Generates the classifier.   *   * @param instances set of instances serving as training data   * @throws Exception if the classifier has not been generated   * successfully   */  public void buildClassifier(Instances instances) throws Exception {    // can classifier handle the data?    getCapabilities().testWithFail(instances);    // remove instances with missing class    m_Instances = new Instances(instances);    m_Instances.deleteWithMissingClass();    // reset variable for this fold    m_SumInstances = 0;    m_ClassIndex = instances.classIndex();    m_NumInstances = m_Instances.numInstances();    m_NumAttributes = instances.numAttributes();    m_NumClasses = instances.numClasses();    // allocate space for attribute reference arrays    m_StartAttIndex = new int[m_NumAttributes];    m_NumAttValues = new int[m_NumAttributes];     m_TotalAttValues = 0;    for(int i = 0; i < m_NumAttributes; i++) {       if(i != m_ClassIndex) {          m_StartAttIndex[i] = m_TotalAttValues;          m_NumAttValues[i] = m_Instances.attribute(i).numValues();          m_TotalAttValues += m_NumAttValues[i] + 1;          // + 1 so room for missing value count       } else {          // m_StartAttIndex[i] = -1;  // class isn't included          m_NumAttValues[i] = m_NumClasses;       }    }    // allocate space for counts and frequencies    m_CondiCounts = new double[m_NumClasses][m_TotalAttValues][m_TotalAttValues];    m_ClassCounts = new double[m_NumClasses];    m_SumForCounts = new double[m_NumClasses][m_NumAttributes];    m_Frequencies = new double[m_TotalAttValues];    m_CondiCountsNoClass = new double[m_TotalAttValues][m_TotalAttValues];        // calculate the counts    for(int k = 0; k < m_NumInstances; k++) {       addToCounts((Instance)m_Instances.instance(k));    }    // free up some space    m_Instances = new Instances(m_Instances, 0);  }   /**   * Updates the classifier with the given instance.   *   * @param instance the new training instance to include in the model    * @throws Exception if the instance could not be incorporated in   * the model.   */  public void updateClassifier(Instance instance) {    this.addToCounts(instance);  }  /**   * Puts an instance's values into m_CondiCounts, m_ClassCounts and    * m_SumInstances.   *   * @param instance the instance whose values are to be put into the    *                 counts variables   */  private void addToCounts(Instance instance) {     double [] countsPointer;    double [] countsNoClassPointer;     if(instance.classIsMissing())       return;   // ignore instances with missing class    int classVal = (int)instance.classValue();    double weight = instance.weight();     m_ClassCounts[classVal] += weight;    m_SumInstances += weight;       // store instance's att val indexes in an array, b/c accessing it     // in loop(s) is more efficient    int [] attIndex = new int[m_NumAttributes];    for(int i = 0; i < m_NumAttributes; i++) {       if(i == m_ClassIndex)          attIndex[i] = -1;  // we don't use the class attribute in counts       else {          if(instance.isMissing(i))             attIndex[i] = m_StartAttIndex[i] + m_NumAttValues[i];          else             attIndex[i] = m_StartAttIndex[i] + (int)instance.value(i);       }    }    for(int Att1 = 0; Att1 < m_NumAttributes; Att1++) {       if(attIndex[Att1] == -1)          continue;   // avoid pointless looping as Att1 is currently the class attribute       m_Frequencies[attIndex[Att1]] += weight;              // if this is a missing value, we don't want to increase sumforcounts       if(!instance.isMissing(Att1))          m_SumForCounts[classVal][Att1] += weight;       // save time by referencing this now, rather than repeatedly in the loop       countsPointer = m_CondiCounts[classVal][attIndex[Att1]];       countsNoClassPointer = m_CondiCountsNoClass[attIndex[Att1]];       for(int Att2 = 0; Att2 < m_NumAttributes; Att2++) {          if(attIndex[Att2] != -1) {             countsPointer[attIndex[Att2]] += weight;             countsNoClassPointer[attIndex[Att2]] += weight;          }       }    }  }    /**   * Calculates the class membership probabilities for the given test   * instance.   *   * @param instance the instance to be classified   * @return predicted class probability distribution   * @throws Exception if there is a problem generating the prediction   */  public double [] distributionForInstance(Instance instance) throws Exception {    // accumulates posterior probabilities for each class    double [] probs = new double[m_NumClasses];    // index for parent attribute value, and a count of parents used    int pIndex, parentCount;     int [] SpecialGeneralArray = new int[m_NumAttributes];        // pointers for efficiency    double [][] countsForClass;    double [] countsForClassParent;    double [] countsForAtti;    double [] countsForAttj;    // store instance's att values in an int array, so accessing them     // is more efficient in loop(s).    int [] attIndex = new int[m_NumAttributes];    for(int att = 0; att < m_NumAttributes; att++) {       if(instance.isMissing(att) || att == m_ClassIndex)          attIndex[att] = -1; // can't use class & missing vals in calculations       else          attIndex[att] = m_StartAttIndex[att] + (int)instance.value(att);    }    // -1 indicates attribute is not a generalization of any other attributes    for(int i = 0; i < m_NumAttributes; i++) {       SpecialGeneralArray[i] = -1;    }    // calculate the specialization-generalization array    for(int i = 0; i < m_NumAttributes; i++){       // skip i if it's the class or is missing       if(attIndex[i] == -1)  continue;       countsForAtti = m_CondiCountsNoClass[attIndex[i]];        for(int j = 0; j < m_NumAttributes; j++) {          // skip j if it's the class, missing, is i or a generalization of i          if((attIndex[j] == -1) || (i == j) || (SpecialGeneralArray[j] == i))            continue;                   countsForAttj = m_CondiCountsNoClass[attIndex[j]];          // check j's frequency is above critical value          if(countsForAttj[attIndex[j]] > m_Critical) {             // skip j if the frequency of i and j together is not equivalent	     // to the frequency of j alone             if(countsForAttj[attIndex[j]] == countsForAtti[attIndex[j]]) {             // if attributes i and j are both a specialization of each other             // avoid deleting both by skipping j                if((countsForAttj[attIndex[j]] == countsForAtti[attIndex[i]])                 && (i < j)){                  continue;                } else {                    // set the specialization relationship                    SpecialGeneralArray[i] = j;                    break; // break out of j loop because a specialization has been found                }             }          }       }    }

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?