⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 minnd.java

📁 代码是一个分类器的实现,其中使用了部分weka的源代码。可以将项目导入eclipse运行
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * MINND.java * Copyright (C) 2005 University of Waikato, Hamilton, New Zealand * */package weka.classifiers.mi;import weka.classifiers.Classifier;import weka.core.Capabilities;import weka.core.Instance;import weka.core.Instances;import weka.core.MultiInstanceCapabilitiesHandler;import weka.core.Option;import weka.core.OptionHandler;import weka.core.TechnicalInformation;import weka.core.TechnicalInformationHandler;import weka.core.Utils;import weka.core.Capabilities.Capability;import weka.core.TechnicalInformation.Field;import weka.core.TechnicalInformation.Type;import java.util.Enumeration;import java.util.Vector;/**  <!-- globalinfo-start --> * Multiple-Instance Nearest Neighbour with Distribution learner.<br/> * <br/> * It uses gradient descent to find the weight for each dimension of each exeamplar from the starting point of 1.0. In order to avoid overfitting, it uses mean-square function (i.e. the Euclidean distance) to search for the weights.<br/> *  It then uses the weights to cleanse the training data. After that it searches for the weights again from the starting points of the weights searched before.<br/> *  Finally it uses the most updated weights to cleanse the test exemplar and then finds the nearest neighbour of the test exemplar using partly-weighted Kullback distance. But the variances in the Kullback distance are the ones before cleansing.<br/> * <br/> * For more information see:<br/> * <br/> * Xin Xu (2001). A nearest distribution approach to multiple-instance learning. Hamilton, NZ. * <p/> <!-- globalinfo-end --> * <!-- technical-bibtex-start --> * BibTeX: * <pre> * &#64;misc{Xu2001, *    address = {Hamilton, NZ}, *    author = {Xin Xu}, *    note = {0657.591B}, *    school = {University of Waikato}, *    title = {A nearest distribution approach to multiple-instance learning}, *    year = {2001} * } * </pre> * <p/> <!-- technical-bibtex-end --> * <!-- options-start --> * Valid options are: <p/> *  * <pre> -K &lt;number of neighbours&gt; *  Set number of nearest neighbour for prediction *  (default 1)</pre> *  * <pre> -S &lt;number of neighbours&gt; *  Set number of nearest neighbour for cleansing the training data *  (default 1)</pre> *  * <pre> -E &lt;number of neighbours&gt; *  Set number of nearest neighbour for cleansing the testing data *  (default 1)</pre> *  <!-- options-end --> * * @author Xin Xu (xx5@cs.waikato.ac.nz) * @version $Revision: 1.4 $  */public class MINND   extends Classifier   implements OptionHandler, MultiInstanceCapabilitiesHandler,             TechnicalInformationHandler {  /** for serialization */  static final long serialVersionUID = -4512599203273864994L;    /** The number of nearest neighbour for prediction */  protected int m_Neighbour = 1;  /** The mean for each attribute of each exemplar */  protected double[][] m_Mean = null;  /** The variance for each attribute of each exemplar */  protected double[][] m_Variance = null;  /** The dimension of each exemplar, i.e. (numAttributes-2) */  protected int m_Dimension = 0;  /** header info of the data */  protected Instances m_Attributes;;  /** The class label of each exemplar */  protected double[] m_Class = null;  /** The number of class labels in the data */  protected int m_NumClasses = 0;  /** The weight of each exemplar */  protected double[] m_Weights = null;  /** The very small number representing zero */  static private double m_ZERO = 1.0e-45;  /** The learning rate in the gradient descent */  protected double m_Rate = -1;  /** The minimum values for numeric attributes. */  private double [] m_MinArray=null;  /** The maximum values for numeric attributes. */  private double [] m_MaxArray=null;  /** The stopping criteria of gradient descent*/  private double m_STOP = 1.0e-45;  /** The weights that alter the dimnesion of each exemplar */  private double[][] m_Change=null;  /** The noise data of each exemplar */  private double[][] m_NoiseM = null, m_NoiseV = null, m_ValidM = null,           m_ValidV = null;  /** The number of nearest neighbour instances in the selection of noises     in the training data*/  private int m_Select = 1;  /** The number of nearest neighbour exemplars in the selection of noises     in the test data */  private int m_Choose = 1;  /** The decay rate of learning rate */  private double m_Decay = 0.5;  /**   * Returns a string describing this filter   *   * @return a description of the filter suitable for   * displaying in the explorer/experimenter gui   */  public String globalInfo() {    return         "Multiple-Instance Nearest Neighbour with Distribution learner.\n\n"      + "It uses gradient descent to find the weight for each dimension of "      + "each exeamplar from the starting point of 1.0. In order to avoid "      + "overfitting, it uses mean-square function (i.e. the Euclidean "      + "distance) to search for the weights.\n "      + "It then uses the weights to cleanse the training data. After that "      + "it searches for the weights again from the starting points of the "      + "weights searched before.\n "      + "Finally it uses the most updated weights to cleanse the test exemplar "      + "and then finds the nearest neighbour of the test exemplar using "      + "partly-weighted Kullback distance. But the variances in the Kullback "      + "distance are the ones before cleansing.\n\n"      + "For more information see:\n\n"      + getTechnicalInformation().toString();  }  /**   * Returns an instance of a TechnicalInformation object, containing    * detailed information about the technical background of this class,   * e.g., paper reference or book this class is based on.   *    * @return the technical information about this class   */  public TechnicalInformation getTechnicalInformation() {    TechnicalInformation 	result;        result = new TechnicalInformation(Type.MISC);    result.setValue(Field.AUTHOR, "Xin Xu");    result.setValue(Field.YEAR, "2001");    result.setValue(Field.TITLE, "A nearest distribution approach to multiple-instance learning");    result.setValue(Field.SCHOOL, "University of Waikato");    result.setValue(Field.ADDRESS, "Hamilton, NZ");    result.setValue(Field.NOTE, "0657.591B");        return result;  }  /**   * Returns default capabilities of the classifier.   *   * @return      the capabilities of this classifier   */  public Capabilities getCapabilities() {    Capabilities result = super.getCapabilities();    // attributes    result.enable(Capability.NOMINAL_ATTRIBUTES);    result.enable(Capability.RELATIONAL_ATTRIBUTES);    result.enable(Capability.MISSING_VALUES);    // class    result.enable(Capability.NOMINAL_CLASS);    result.enable(Capability.MISSING_CLASS_VALUES);        // other    result.enable(Capability.ONLY_MULTIINSTANCE);        return result;  }  /**   * Returns the capabilities of this multi-instance classifier for the   * relational data.   *   * @return            the capabilities of this object   * @see               Capabilities   */  public Capabilities getMultiInstanceCapabilities() {    Capabilities result = super.getCapabilities();        // attributes    result.enable(Capability.NUMERIC_ATTRIBUTES);    result.enable(Capability.DATE_ATTRIBUTES);    result.enable(Capability.MISSING_VALUES);    // class    result.disableAllClasses();    result.enable(Capability.NO_CLASS);        return result;  }  /**   * As normal Nearest Neighbour algorithm does, it's lazy and simply   * records the exemplar information (i.e. mean and variance for each   * dimension of each exemplar and their classes) when building the model.   * There is actually no need to store the exemplars themselves.   *   * @param exs the training exemplars   * @throws Exception if the model cannot be built properly   */      public void buildClassifier(Instances exs)throws Exception{    // can classifier handle the data?    getCapabilities().testWithFail(exs);    // remove instances with missing class    Instances newData = new Instances(exs);    newData.deleteWithMissingClass();        int numegs = newData.numInstances();    m_Dimension = newData.attribute(1).relation().numAttributes();    m_Attributes = newData.stringFreeStructure();     m_Change = new double[numegs][m_Dimension];    m_NumClasses = exs.numClasses();    m_Mean = new double[numegs][m_Dimension];    m_Variance = new double[numegs][m_Dimension];    m_Class = new double[numegs];    m_Weights = new double[numegs];    m_NoiseM = new double[numegs][m_Dimension];    m_NoiseV = new double[numegs][m_Dimension];    m_ValidM = new double[numegs][m_Dimension];    m_ValidV = new double[numegs][m_Dimension];    m_MinArray = new double[m_Dimension];    m_MaxArray = new double[m_Dimension];    for(int v=0; v < m_Dimension; v++)      m_MinArray[v] = m_MaxArray[v] = Double.NaN;    for(int w=0; w < numegs; w++){      updateMinMax(newData.instance(w));    }    // Scale exemplars    Instances data = m_Attributes;    for(int x=0; x < numegs; x++){      Instance example = newData.instance(x);      example = scale(example);      for (int i=0; i<m_Dimension; i++) {        m_Mean[x][i] = example.relationalValue(1).meanOrMode(i);	        m_Variance[x][i] = example.relationalValue(1).variance(i);        if(Utils.eq(m_Variance[x][i],0.0))          m_Variance[x][i] = m_ZERO;        m_Change[x][i] = 1.0;      }      /* for(int y=0; y < m_Variance[x].length; y++){         if(Utils.eq(m_Variance[x][y],0.0))         m_Variance[x][y] = m_ZERO;         m_Change[x][y] = 1.0;         }  */	      data.add(example);      m_Class[x] = example.classValue();      m_Weights[x] = example.weight();	    }    for(int z=0; z < numegs; z++)      findWeights(z, m_Mean);    // Pre-process and record "true estimated" parameters for distributions     for(int x=0; x < numegs; x++){      Instance example = preprocess(data, x);      if (getDebug())        System.out.println("???Exemplar "+x+" has been pre-processed:"+            data.instance(x).relationalValue(1).sumOfWeights()+            "|"+example.relationalValue(1).sumOfWeights()+            "; class:"+m_Class[x]);      if(Utils.gr(example.relationalValue(1).sumOfWeights(), 0)){	        for (int i=0; i<m_Dimension; i++) {          m_ValidM[x][i] = example.relationalValue(1).meanOrMode(i);          m_ValidV[x][i] = example.relationalValue(1).variance(i);          if(Utils.eq(m_ValidV[x][i],0.0))            m_ValidV[x][i] = m_ZERO;        }        /*	for(int y=0; y < m_ValidV[x].length; y++){                if(Utils.eq(m_ValidV[x][y],0.0))                m_ValidV[x][y] = m_ZERO;                }*/	      }      else{        m_ValidM[x] = null;        m_ValidV[x] = null;      }    }    for(int z=0; z < numegs; z++)      if(m_ValidM[z] != null)        findWeights(z, m_ValidM);	

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -