tldsimple.java

来自「代码是一个分类器的实现,其中使用了部分weka的源代码。可以将项目导入eclip」· Java 代码 · 共 1,020 行 · 第 1/2 页
JAVA
1,020 行
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * TLDSimple.java * Copyright (C) 2005 University of Waikato, Hamilton, New Zealand * */package weka.classifiers.mi;import weka.classifiers.RandomizableClassifier;import weka.core.Capabilities;import weka.core.Instance;import weka.core.Instances;import weka.core.MultiInstanceCapabilitiesHandler;import weka.core.Optimization;import weka.core.Option;import weka.core.OptionHandler;import weka.core.TechnicalInformation;import weka.core.TechnicalInformationHandler;import weka.core.Utils;import weka.core.Capabilities.Capability;import weka.core.TechnicalInformation.Field;import weka.core.TechnicalInformation.Type;import java.util.Enumeration;import java.util.Random;import java.util.Vector;/**  <!-- globalinfo-start --> * A simpler version of TLD, mu random but sigma^2 fixed and estimated via data.<br/> * <br/> * For more information see:<br/> * <br/> * Xin Xu (2003). Statistical learning in multiple instance problem. Hamilton, NZ. * <p/> <!-- globalinfo-end --> *  <!-- technical-bibtex-start --> * BibTeX: * <pre> * &#64;mastersthesis{Xu2003, *    address = {Hamilton, NZ}, *    author = {Xin Xu}, *    note = {0657.594}, *    school = {University of Waikato}, *    title = {Statistical learning in multiple instance problem}, *    year = {2003} * } * </pre> * <p/> <!-- technical-bibtex-end --> * <!-- options-start --> * Valid options are: <p/> *  * <pre> -C *  Set whether or not use empirical *  log-odds cut-off instead of 0</pre> *  * <pre> -R &lt;numOfRuns&gt; *  Set the number of multiple runs  *  needed for searching the MLE.</pre> *  * <pre> -S &lt;num&gt; *  Random number seed. *  (default 1)</pre> *  * <pre> -D *  If set, classifier is run in debug mode and *  may output additional info to the console</pre> *  <!-- options-end --> * * @author Eibe Frank (eibe@cs.waikato.ac.nz) * @author Xin Xu (xx5@cs.waikato.ac.nz) * @version $Revision: 1.5 $  */public class TLDSimple   extends RandomizableClassifier   implements OptionHandler, MultiInstanceCapabilitiesHandler,             TechnicalInformationHandler {  /** for serialization */  static final long serialVersionUID = 9040995947243286591L;    /** The mean for each attribute of each positive exemplar */  protected double[][] m_MeanP = null;  /** The mean for each attribute of each negative exemplar */  protected double[][] m_MeanN = null;  /** The effective sum of weights of each positive exemplar in each dimension*/  protected double[][] m_SumP = null;  /** The effective sum of weights of each negative exemplar in each dimension*/  protected double[][] m_SumN = null;  /** Estimated sigma^2 in positive bags*/  protected double[] m_SgmSqP;  /** Estimated sigma^2 in negative bags*/  protected double[] m_SgmSqN;  /** The parameters to be estimated for each positive exemplar*/  protected double[] m_ParamsP = null;  /** The parameters to be estimated for each negative exemplar*/  protected double[] m_ParamsN = null;  /** The dimension of each exemplar, i.e. (numAttributes-2) */  protected int m_Dimension = 0;  /** The class label of each exemplar */  protected double[] m_Class = null;  /** The number of class labels in the data */  protected int m_NumClasses = 2;  /** The very small number representing zero */  static public double ZERO = 1.0e-12;     protected int m_Run = 1;  protected double m_Cutoff;  protected boolean m_UseEmpiricalCutOff = false;      private double[] m_LkRatio;  private Instances m_Attribute = null;  /**   * Returns a string describing this filter   *   * @return a description of the filter suitable for   * displaying in the explorer/experimenter gui   */  public String globalInfo() {    return         "A simpler version of TLD, mu random but sigma^2 fixed and estimated "      + "via data.\n\n"      + "For more information see:\n\n"      + getTechnicalInformation().toString();  }    /**   * Returns an instance of a TechnicalInformation object, containing    * detailed information about the technical background of this class,   * e.g., paper reference or book this class is based on.   *    * @return the technical information about this class   */  public TechnicalInformation getTechnicalInformation() {    TechnicalInformation 	result;        result = new TechnicalInformation(Type.MASTERSTHESIS);    result.setValue(Field.AUTHOR, "Xin Xu");    result.setValue(Field.YEAR, "2003");    result.setValue(Field.TITLE, "Statistical learning in multiple instance problem");    result.setValue(Field.SCHOOL, "University of Waikato");    result.setValue(Field.ADDRESS, "Hamilton, NZ");    result.setValue(Field.NOTE, "0657.594");        return result;  }  /**   * Returns default capabilities of the classifier.   *   * @return      the capabilities of this classifier   */  public Capabilities getCapabilities() {    Capabilities result = super.getCapabilities();    // attributes    result.enable(Capability.NOMINAL_ATTRIBUTES);    result.enable(Capability.RELATIONAL_ATTRIBUTES);    result.enable(Capability.MISSING_VALUES);    // class    result.enable(Capability.BINARY_CLASS);    result.enable(Capability.MISSING_CLASS_VALUES);        // other    result.enable(Capability.ONLY_MULTIINSTANCE);        return result;  }  /**   * Returns the capabilities of this multi-instance classifier for the   * relational data.   *   * @return            the capabilities of this object   * @see               Capabilities   */  public Capabilities getMultiInstanceCapabilities() {    Capabilities result = super.getCapabilities();        // attributes    result.enable(Capability.NOMINAL_ATTRIBUTES);    result.enable(Capability.NUMERIC_ATTRIBUTES);    result.enable(Capability.DATE_ATTRIBUTES);    result.enable(Capability.MISSING_VALUES);    // class    result.disableAllClasses();    result.enable(Capability.NO_CLASS);        return result;  }  /**   *   * @param exs the training exemplars   * @throws Exception if the model cannot be built properly   */      public void buildClassifier(Instances exs)throws Exception{    // can classifier handle the data?    getCapabilities().testWithFail(exs);    // remove instances with missing class    exs = new Instances(exs);    exs.deleteWithMissingClass();        int numegs = exs.numInstances();    m_Dimension = exs.attribute(1).relation().numAttributes();    m_Attribute = exs.attribute(1).relation().stringFreeStructure();    Instances pos = new Instances(exs, 0), neg = new Instances(exs, 0);    // Divide into two groups    for(int u=0; u<numegs; u++){      Instance example = exs.instance(u);      if(example.classValue() == 1)        pos.add(example);      else        neg.add(example);    }	    int pnum = pos.numInstances(), nnum = neg.numInstances();	    // xBar, n    m_MeanP = new double[pnum][m_Dimension];    m_SumP = new double[pnum][m_Dimension];    m_MeanN = new double[nnum][m_Dimension];    m_SumN = new double[nnum][m_Dimension];    // w, m    m_ParamsP = new double[2*m_Dimension];    m_ParamsN = new double[2*m_Dimension];    // \sigma^2    m_SgmSqP = new double[m_Dimension];    m_SgmSqN = new double[m_Dimension];    // S^2    double[][] varP=new double[pnum][m_Dimension],       varN=new double[nnum][m_Dimension];    // numOfEx 'e' without all missing    double[] effNumExP=new double[m_Dimension],       effNumExN=new double[m_Dimension];    // For the starting values    double[] pMM=new double[m_Dimension],       nMM=new double[m_Dimension],      pVM=new double[m_Dimension],      nVM=new double[m_Dimension];    // # of exemplars with only one instance    double[] numOneInsExsP=new double[m_Dimension],      numOneInsExsN=new double[m_Dimension];    // sum_i(1/n_i)    double[] pInvN = new double[m_Dimension], nInvN = new double[m_Dimension];    // Extract metadata from both positive and negative bags    for(int v=0; v < pnum; v++){      //Instance px = pos.instance(v);      Instances pxi =  pos.instance(v).relationalValue(1);      for (int k=0; k<pxi.numAttributes(); k++) {        m_MeanP[v][k] = pxi.meanOrMode(k);        varP[v][k] = pxi.variance(k);      }      for (int w=0,t=0; w < m_Dimension; w++,t++){		        //if((t==m_ClassIndex) || (t==m_IdIndex))        //  t++;	        if(varP[v][w] <= 0.0)          varP[v][w] = 0.0;        if(!Double.isNaN(m_MeanP[v][w])){          for(int u=0;u<pxi.numInstances();u++)            if(!pxi.instance(u).isMissing(t))			                  m_SumP[v][w] += pxi.instance(u).weight();          pMM[w] += m_MeanP[v][w];          pVM[w] += m_MeanP[v][w]*m_MeanP[v][w];		              if((m_SumP[v][w]>1) && (varP[v][w]>ZERO)){	            m_SgmSqP[w] += varP[v][w]*(m_SumP[v][w]-1.0)/m_SumP[v][w];            //m_SgmSqP[w] += varP[v][w]*(m_SumP[v][w]-1.0);            effNumExP[w]++; // Not count exemplars with 1 instance            pInvN[w] += 1.0/m_SumP[v][w];            //pInvN[w] += m_SumP[v][w];          }          else            numOneInsExsP[w]++;        }      }			        }    for(int v=0; v < nnum; v++){      //Instance nx = neg.instance(v);      Instances nxi = neg.instance(v).relationalValue(1);      for (int k=0; k<nxi.numAttributes(); k++) {        m_MeanN[v][k] = nxi.meanOrMode(k);        varN[v][k] = nxi.variance(k);      }      //Instances nxi =  nx.getInstances();      for (int w=0,t=0; w < m_Dimension; w++,t++){        //if((t==m_ClassIndex) || (t==m_IdIndex))        //  t++;	        if(varN[v][w] <= 0.0)          varN[v][w] = 0.0;        if(!Double.isNaN(m_MeanN[v][w])){          for(int u=0;u<nxi.numInstances();u++)            if(!nxi.instance(u).isMissing(t))              m_SumN[v][w] += nxi.instance(u).weight();	          nMM[w] += m_MeanN[v][w];           nVM[w] += m_MeanN[v][w]*m_MeanN[v][w];          if((m_SumN[v][w]>1) && (varN[v][w]>ZERO)){			            m_SgmSqN[w] += varN[v][w]*(m_SumN[v][w]-1.0)/m_SumN[v][w];            //m_SgmSqN[w] += varN[v][w]*(m_SumN[v][w]-1.0);            effNumExN[w]++; // Not count exemplars with 1 instance            nInvN[w] += 1.0/m_SumN[v][w];            //nInvN[w] += m_SumN[v][w];          }          else            numOneInsExsN[w]++;        }					      }    }    // Expected \sigma^2    /* if m_SgmSqP[u] or m_SgmSqN[u] is 0, assign 0 to sigma^2.      * Otherwise, may cause k m_SgmSqP / m_SgmSqN to be NaN.     * Modified by Lin Dong (Sep. 2005)     */    for (int u=0; u < m_Dimension; u++){      // For exemplars with only one instance, use avg(\sigma^2) of other exemplars      if (m_SgmSqP[u]!=0)        m_SgmSqP[u] /= (effNumExP[u]-pInvN[u]);      else        m_SgmSqP[u] = 0;      if (m_SgmSqN[u]!=0)        m_SgmSqN[u] /= (effNumExN[u]-nInvN[u]);      else        m_SgmSqN[u] = 0;      //m_SgmSqP[u] /= (pInvN[u]-effNumExP[u]);      //m_SgmSqN[u] /= (nInvN[u]-effNumExN[u]);      effNumExP[u] += numOneInsExsP[u];      effNumExN[u] += numOneInsExsN[u];      pMM[u] /= effNumExP[u];      nMM[u] /= effNumExN[u];      pVM[u] = pVM[u]/(effNumExP[u]-1.0) - pMM[u]*pMM[u]*effNumExP[u]/(effNumExP[u]-1.0);      nVM[u] = nVM[u]/(effNumExN[u]-1.0) - nMM[u]*nMM[u]*effNumExN[u]/(effNumExN[u]-1.0);    }    //Bounds and parameter values for each run    double[][] bounds = new double[2][2];    double[] pThisParam = new double[2],       nThisParam = new double[2];    // Initial values for parameters    double w, m;    Random whichEx = new Random(m_Seed);    // Optimize for one dimension    for (int x=0; x < m_Dimension; x++){           // System.out.println("\n\n!!!!!!!!!!!!!!!!!!!!!!???Dimension #"+x);      // Positive examplars: first run       pThisParam[0] = pVM[x];  // w      if( pThisParam[0] <= ZERO)        pThisParam[0] = 1.0;      pThisParam[1] = pMM[x];  // m      // Negative examplars: first run      nThisParam[0] = nVM[x];  // w      if(nThisParam[0] <= ZERO)        nThisParam[0] = 1.0;      nThisParam[1] = nMM[x];  // m      // Bound constraints      bounds[0][0] = ZERO; // w > 0      bounds[0][1] = Double.NaN;      bounds[1][0] = Double.NaN;       bounds[1][1] = Double.NaN;      double pminVal=Double.MAX_VALUE, nminVal=Double.MAX_VALUE;       TLDSimple_Optm pOp=null, nOp=null;	      boolean isRunValid = true;      double[] sumP=new double[pnum], meanP=new double[pnum];      double[] sumN=new double[nnum], meanN=new double[nnum];      // One dimension      for(int p=0; p<pnum; p++){        sumP[p] = m_SumP[p][x];        meanP[p] = m_MeanP[p][x];      }      for(int q=0; q<nnum; q++){        sumN[q] = m_SumN[q][x];        meanN[q] = m_MeanN[q][x];      }      for(int y=0; y<m_Run; y++){        //System.out.println("\n\n!!!!!!!!!Positive exemplars: Run #"+y);        double thisMin;        pOp = new TLDSimple_Optm();        pOp.setNum(sumP);        pOp.setSgmSq(m_SgmSqP[x]);        if (getDebug())          System.out.println("m_SgmSqP["+x+"]= " +m_SgmSqP[x]);        pOp.setXBar(meanP);        //pOp.setDebug(true);        pThisParam = pOp.findArgmin(pThisParam, bounds);        while(pThisParam==null){          pThisParam = pOp.getVarbValues();		              if (getDebug())            System.out.println("!!! 200 iterations finished, not enough!");          pThisParam = pOp.findArgmin(pThisParam, bounds);        }	        thisMin = pOp.getMinFunction();        if(!Double.isNaN(thisMin) && (thisMin<pminVal)){          pminVal = thisMin;          for(int z=0; z<2; z++)            m_ParamsP[2*x+z] = pThisParam[z];        }        if(Double.isNaN(thisMin)){          pThisParam = new double[2];          isRunValid =false;        }        if(!isRunValid){ y--; isRunValid=true; }         // Change the initial parameters and restart        int pone = whichEx.nextInt(pnum);        // Positive exemplars: next run         while(Double.isNaN(m_MeanP[pone][x]))          pone = whichEx.nextInt(pnum);        m = m_MeanP[pone][x];        w = (m-pThisParam[1])*(m-pThisParam[1]);        pThisParam[0] = w;  // w        pThisParam[1] = m;  // m	          }      for(int y=0; y<m_Run; y++){        //System.out.println("\n\n!!!!!!!!!Negative exemplars: Run #"+y);        double thisMin;        nOp = new TLDSimple_Optm();        nOp.setNum(sumN);        nOp.setSgmSq(m_SgmSqN[x]);        if (getDebug())          System.out.println(m_SgmSqN[x]);        nOp.setXBar(meanN);        //nOp.setDebug(true);        nThisParam = nOp.findArgmin(nThisParam, bounds);        while(nThisParam==null){	          nThisParam = nOp.getVarbValues();          if (getDebug())            System.out.println("!!! 200 iterations finished, not enough!");          nThisParam = nOp.findArgmin(nThisParam, bounds);        }			        thisMin = nOp.getMinFunction();	        if(!Double.isNaN(thisMin) && (thisMin<nminVal)){          nminVal = thisMin;          for(int z=0; z<2; z++)            m_ParamsN[2*x+z] = nThisParam[z];             }        if(Double.isNaN(thisMin)){          nThisParam = new double[2];          isRunValid =false;        }        if(!isRunValid){ y--; isRunValid=true; } 		        // Change the initial parameters and restart	   	            int none = whichEx.nextInt(nnum);// Randomly pick one pos. exmpl.
tldsimple.java - 源码说明

本页面展示了「代码是一个分类器的实现,其中使用了部分weka的源代码。可以将项目导入eclipse运行」中的 tldsimple.java 源码文件，采用 Java 编程语言编写，共 1,020 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与eclipse相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?