tld.java

来自「代码是一个分类器的实现,其中使用了部分weka的源代码。可以将项目导入eclip」· Java 代码 · 共 1,139 行 · 第 1/3 页
JAVA
1,139 行
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * TLD.java * Copyright (C) 2005 University of Waikato, Hamilton, New Zealand * */package weka.classifiers.mi;import weka.classifiers.RandomizableClassifier;import weka.core.Capabilities;import weka.core.Instance;import weka.core.Instances;import weka.core.MultiInstanceCapabilitiesHandler;import weka.core.Optimization;import weka.core.Option;import weka.core.OptionHandler;import weka.core.TechnicalInformation;import weka.core.TechnicalInformationHandler;import weka.core.Utils;import weka.core.Capabilities.Capability;import weka.core.TechnicalInformation.Field;import weka.core.TechnicalInformation.Type;import java.util.Enumeration;import java.util.Random;import java.util.Vector;/**  <!-- globalinfo-start --> * Two-Level Distribution approach, changes the starting value of the searching algorithm, supplement the cut-off modification and check missing values.<br/> * <br/> * For more information see:<br/> * <br/> * Xin Xu (2003). Statistical learning in multiple instance problem. Hamilton, NZ. * <p/> <!-- globalinfo-end --> * <!-- technical-bibtex-start --> * BibTeX: * <pre> * &#64;mastersthesis{Xu2003, *    address = {Hamilton, NZ}, *    author = {Xin Xu}, *    note = {0657.594}, *    school = {University of Waikato}, *    title = {Statistical learning in multiple instance problem}, *    year = {2003} * } * </pre> * <p/> <!-- technical-bibtex-end --> * <!-- options-start --> * Valid options are: <p/> *  * <pre> -C *  Set whether or not use empirical *  log-odds cut-off instead of 0</pre> *  * <pre> -R &lt;numOfRuns&gt; *  Set the number of multiple runs  *  needed for searching the MLE.</pre> *  * <pre> -S &lt;num&gt; *  Random number seed. *  (default 1)</pre> *  * <pre> -D *  If set, classifier is run in debug mode and *  may output additional info to the console</pre> *  <!-- options-end --> * * @author Eibe Frank (eibe@cs.waikato.ac.nz) * @author Xin Xu (xx5@cs.waikato.ac.nz) * @version $Revision: 1.5 $  */public class TLD   extends RandomizableClassifier   implements OptionHandler, MultiInstanceCapabilitiesHandler,             TechnicalInformationHandler {  /** for serialization */  static final long serialVersionUID = 6657315525171152210L;    /** The mean for each attribute of each positive exemplar */  protected double[][] m_MeanP = null;  /** The variance for each attribute of each positive exemplar */  protected double[][] m_VarianceP = null;  /** The mean for each attribute of each negative exemplar */  protected double[][] m_MeanN = null;  /** The variance for each attribute of each negative exemplar */  protected double[][] m_VarianceN = null;  /** The effective sum of weights of each positive exemplar in each dimension*/  protected double[][] m_SumP = null;  /** The effective sum of weights of each negative exemplar in each dimension*/  protected double[][] m_SumN = null;  /** The parameters to be estimated for each positive exemplar*/  protected double[] m_ParamsP = null;  /** The parameters to be estimated for each negative exemplar*/  protected double[] m_ParamsN = null;  /** The dimension of each exemplar, i.e. (numAttributes-2) */  protected int m_Dimension = 0;  /** The class label of each exemplar */  protected double[] m_Class = null;  /** The number of class labels in the data */  protected int m_NumClasses = 2;  /** The very small number representing zero */  static public double ZERO = 1.0e-6;     /** The number of runs to perform */  protected int m_Run = 1;  protected double m_Cutoff;  protected boolean m_UseEmpiricalCutOff = false;     /**   * Returns a string describing this filter   *   * @return a description of the filter suitable for   * displaying in the explorer/experimenter gui   */  public String globalInfo() {    return         "Two-Level Distribution approach, changes the starting value of "      + "the searching algorithm, supplement the cut-off modification and "      + "check missing values.\n\n"      + "For more information see:\n\n"      + getTechnicalInformation().toString();  }    /**   * Returns an instance of a TechnicalInformation object, containing    * detailed information about the technical background of this class,   * e.g., paper reference or book this class is based on.   *    * @return the technical information about this class   */  public TechnicalInformation getTechnicalInformation() {    TechnicalInformation 	result;        result = new TechnicalInformation(Type.MASTERSTHESIS);    result.setValue(Field.AUTHOR, "Xin Xu");    result.setValue(Field.YEAR, "2003");    result.setValue(Field.TITLE, "Statistical learning in multiple instance problem");    result.setValue(Field.SCHOOL, "University of Waikato");    result.setValue(Field.ADDRESS, "Hamilton, NZ");    result.setValue(Field.NOTE, "0657.594");        return result;  }  /**   * Returns default capabilities of the classifier.   *   * @return      the capabilities of this classifier   */  public Capabilities getCapabilities() {    Capabilities result = super.getCapabilities();    // attributes    result.enable(Capability.NOMINAL_ATTRIBUTES);    result.enable(Capability.RELATIONAL_ATTRIBUTES);    result.enable(Capability.MISSING_VALUES);    // class    result.enable(Capability.BINARY_CLASS);    result.enable(Capability.MISSING_CLASS_VALUES);        // other    result.enable(Capability.ONLY_MULTIINSTANCE);        return result;  }  /**   * Returns the capabilities of this multi-instance classifier for the   * relational data.   *   * @return            the capabilities of this object   * @see               Capabilities   */  public Capabilities getMultiInstanceCapabilities() {    Capabilities result = super.getCapabilities();        // attributes    result.enable(Capability.NUMERIC_ATTRIBUTES);    result.enable(Capability.MISSING_VALUES);    // class    result.disableAllClasses();    result.enable(Capability.NO_CLASS);        return result;  }  /**   *   * @param exs the training exemplars   * @throws Exception if the model cannot be built properly   */      public void buildClassifier(Instances exs)throws Exception{    // can classifier handle the data?    getCapabilities().testWithFail(exs);    // remove instances with missing class    exs = new Instances(exs);    exs.deleteWithMissingClass();        int numegs = exs.numInstances();    m_Dimension = exs.attribute(1).relation(). numAttributes();    Instances pos = new Instances(exs, 0), neg = new Instances(exs, 0);    for(int u=0; u<numegs; u++){      Instance example = exs.instance(u);      if(example.classValue() == 1)        pos.add(example);      else        neg.add(example);    }    int pnum = pos.numInstances(), nnum = neg.numInstances();	    m_MeanP = new double[pnum][m_Dimension];    m_VarianceP = new double[pnum][m_Dimension];    m_SumP = new double[pnum][m_Dimension];    m_MeanN = new double[nnum][m_Dimension];    m_VarianceN = new double[nnum][m_Dimension];    m_SumN = new double[nnum][m_Dimension];    m_ParamsP = new double[4*m_Dimension];    m_ParamsN = new double[4*m_Dimension];    // Estimation of the parameters: as the start value for search    double[] pSumVal=new double[m_Dimension], // for m       nSumVal=new double[m_Dimension];     double[] maxVarsP=new double[m_Dimension], // for a      maxVarsN=new double[m_Dimension];     // Mean of sample variances: for b, b=a/E(\sigma^2)+2    double[] varMeanP = new double[m_Dimension],      varMeanN = new double[m_Dimension];     // Variances of sample means: for w, w=E[var(\mu)]/E[\sigma^2]    double[] meanVarP = new double[m_Dimension],      meanVarN = new double[m_Dimension];    // number of exemplars without all values missing    double[] numExsP = new double[m_Dimension],      numExsN = new double[m_Dimension];    // Extract metadata fro both positive and negative bags    for(int v=0; v < pnum; v++){      /*Exemplar px = pos.exemplar(v);        m_MeanP[v] = px.meanOrMode();        m_VarianceP[v] = px.variance();        Instances pxi =  px.getInstances();        */      Instances pxi =  pos.instance(v).relationalValue(1);      for (int k=0; k<pxi.numAttributes(); k++) {         m_MeanP[v][k] = pxi.meanOrMode(k);        m_VarianceP[v][k] = pxi.variance(k);      }      for (int w=0,t=0; w < m_Dimension; w++,t++){		        //if((t==m_ClassIndex) || (t==m_IdIndex))        //  t++;		        if(!Double.isNaN(m_MeanP[v][w])){          for(int u=0;u<pxi.numInstances();u++){            Instance ins = pxi.instance(u);			            if(!ins.isMissing(t))              m_SumP[v][w] += ins.weight();			             }             numExsP[w]++;            pSumVal[w] += m_MeanP[v][w];          meanVarP[w] += m_MeanP[v][w]*m_MeanP[v][w];              if(maxVarsP[w] < m_VarianceP[v][w])            maxVarsP[w] = m_VarianceP[v][w];          varMeanP[w] += m_VarianceP[v][w];          m_VarianceP[v][w] *= (m_SumP[v][w]-1.0);          if(m_VarianceP[v][w] < 0.0)            m_VarianceP[v][w] = 0.0;        }      }    }    for(int v=0; v < nnum; v++){      /*Exemplar nx = neg.exemplar(v);        m_MeanN[v] = nx.meanOrMode();        m_VarianceN[v] = nx.variance();        Instances nxi =  nx.getInstances();        */      Instances nxi =  neg.instance(v).relationalValue(1);      for (int k=0; k<nxi.numAttributes(); k++) {        m_MeanN[v][k] = nxi.meanOrMode(k);        m_VarianceN[v][k] = nxi.variance(k);      }      for (int w=0,t=0; w < m_Dimension; w++,t++){		        //if((t==m_ClassIndex) || (t==m_IdIndex))        //  t++;		        if(!Double.isNaN(m_MeanN[v][w])){          for(int u=0;u<nxi.numInstances();u++)            if(!nxi.instance(u).isMissing(t))              m_SumN[v][w] += nxi.instance(u).weight();          numExsN[w]++; 	          nSumVal[w] += m_MeanN[v][w];          meanVarN[w] += m_MeanN[v][w]*m_MeanN[v][w];           if(maxVarsN[w] < m_VarianceN[v][w])            maxVarsN[w] = m_VarianceN[v][w];          varMeanN[w] += m_VarianceN[v][w];          m_VarianceN[v][w] *= (m_SumN[v][w]-1.0);          if(m_VarianceN[v][w] < 0.0)            m_VarianceN[v][w] = 0.0;        }      }    }    for(int w=0; w<m_Dimension; w++){      pSumVal[w] /= numExsP[w];      nSumVal[w] /= numExsN[w];      if(numExsP[w]>1)        meanVarP[w] = meanVarP[w]/(numExsP[w]-1.0)           - pSumVal[w]*numExsP[w]/(numExsP[w]-1.0);      if(numExsN[w]>1)        meanVarN[w] = meanVarN[w]/(numExsN[w]-1.0)           - nSumVal[w]*numExsN[w]/(numExsN[w]-1.0);      varMeanP[w] /= numExsP[w];      varMeanN[w] /= numExsN[w];    }    //Bounds and parameter values for each run    double[][] bounds = new double[2][4];    double[] pThisParam = new double[4],       nThisParam = new double[4];    // Initial values for parameters    double a, b, w, m;    // Optimize for one dimension    for (int x=0; x < m_Dimension; x++){      if (getDebug())	System.err.println("\n\n!!!!!!!!!!!!!!!!!!!!!!???Dimension #"+x);      // Positive examplars: first run      a = (maxVarsP[x]>ZERO) ? maxVarsP[x]:1.0;       if (varMeanP[x]<=ZERO)   varMeanP[x] = ZERO;  // modified by LinDong (09/2005)      b = a/varMeanP[x]+2.0; // a/(b-2) = E(\sigma^2)      w = meanVarP[x]/varMeanP[x]; // E[var(\mu)] = w*E[\sigma^2]	          if(w<=ZERO)  w=1.0;      m = pSumVal[x]; 	        pThisParam[0] = a;    // a
tld.java - 源码说明

本页面展示了「代码是一个分类器的实现,其中使用了部分weka的源代码。可以将项目导入eclipse运行」中的 tld.java 源码文件，采用 Java 编程语言编写，共 1,139 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与eclipse相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?