📄 tld.java
字号:
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * TLD.java * Copyright (C) 2005 University of Waikato, Hamilton, New Zealand * */package weka.classifiers.mi;import weka.classifiers.RandomizableClassifier;import weka.core.Capabilities;import weka.core.Instance;import weka.core.Instances;import weka.core.MultiInstanceCapabilitiesHandler;import weka.core.Optimization;import weka.core.Option;import weka.core.OptionHandler;import weka.core.TechnicalInformation;import weka.core.TechnicalInformationHandler;import weka.core.Utils;import weka.core.Capabilities.Capability;import weka.core.TechnicalInformation.Field;import weka.core.TechnicalInformation.Type;import java.util.Enumeration;import java.util.Random;import java.util.Vector;/** <!-- globalinfo-start --> * Two-Level Distribution approach, changes the starting value of the searching algorithm, supplement the cut-off modification and check missing values.<br/> * <br/> * For more information see:<br/> * <br/> * Xin Xu (2003). Statistical learning in multiple instance problem. Hamilton, NZ. * <p/> <!-- globalinfo-end --> * <!-- technical-bibtex-start --> * BibTeX: * <pre> * @mastersthesis{Xu2003, * address = {Hamilton, NZ}, * author = {Xin Xu}, * note = {0657.594}, * school = {University of Waikato}, * title = {Statistical learning in multiple instance problem}, * year = {2003} * } * </pre> * <p/> <!-- technical-bibtex-end --> * <!-- options-start --> * Valid options are: <p/> * * <pre> -C * Set whether or not use empirical * log-odds cut-off instead of 0</pre> * * <pre> -R <numOfRuns> * Set the number of multiple runs * needed for searching the MLE.</pre> * * <pre> -S <num> * Random number seed. * (default 1)</pre> * * <pre> -D * If set, classifier is run in debug mode and * may output additional info to the console</pre> * <!-- options-end --> * * @author Eibe Frank (eibe@cs.waikato.ac.nz) * @author Xin Xu (xx5@cs.waikato.ac.nz) * @version $Revision: 1.5 $ */public class TLD extends RandomizableClassifier implements OptionHandler, MultiInstanceCapabilitiesHandler, TechnicalInformationHandler { /** for serialization */ static final long serialVersionUID = 6657315525171152210L; /** The mean for each attribute of each positive exemplar */ protected double[][] m_MeanP = null; /** The variance for each attribute of each positive exemplar */ protected double[][] m_VarianceP = null; /** The mean for each attribute of each negative exemplar */ protected double[][] m_MeanN = null; /** The variance for each attribute of each negative exemplar */ protected double[][] m_VarianceN = null; /** The effective sum of weights of each positive exemplar in each dimension*/ protected double[][] m_SumP = null; /** The effective sum of weights of each negative exemplar in each dimension*/ protected double[][] m_SumN = null; /** The parameters to be estimated for each positive exemplar*/ protected double[] m_ParamsP = null; /** The parameters to be estimated for each negative exemplar*/ protected double[] m_ParamsN = null; /** The dimension of each exemplar, i.e. (numAttributes-2) */ protected int m_Dimension = 0; /** The class label of each exemplar */ protected double[] m_Class = null; /** The number of class labels in the data */ protected int m_NumClasses = 2; /** The very small number representing zero */ static public double ZERO = 1.0e-6; /** The number of runs to perform */ protected int m_Run = 1; protected double m_Cutoff; protected boolean m_UseEmpiricalCutOff = false; /** * Returns a string describing this filter * * @return a description of the filter suitable for * displaying in the explorer/experimenter gui */ public String globalInfo() { return "Two-Level Distribution approach, changes the starting value of " + "the searching algorithm, supplement the cut-off modification and " + "check missing values.\n\n" + "For more information see:\n\n" + getTechnicalInformation().toString(); } /** * Returns an instance of a TechnicalInformation object, containing * detailed information about the technical background of this class, * e.g., paper reference or book this class is based on. * * @return the technical information about this class */ public TechnicalInformation getTechnicalInformation() { TechnicalInformation result; result = new TechnicalInformation(Type.MASTERSTHESIS); result.setValue(Field.AUTHOR, "Xin Xu"); result.setValue(Field.YEAR, "2003"); result.setValue(Field.TITLE, "Statistical learning in multiple instance problem"); result.setValue(Field.SCHOOL, "University of Waikato"); result.setValue(Field.ADDRESS, "Hamilton, NZ"); result.setValue(Field.NOTE, "0657.594"); return result; } /** * Returns default capabilities of the classifier. * * @return the capabilities of this classifier */ public Capabilities getCapabilities() { Capabilities result = super.getCapabilities(); // attributes result.enable(Capability.NOMINAL_ATTRIBUTES); result.enable(Capability.RELATIONAL_ATTRIBUTES); result.enable(Capability.MISSING_VALUES); // class result.enable(Capability.BINARY_CLASS); result.enable(Capability.MISSING_CLASS_VALUES); // other result.enable(Capability.ONLY_MULTIINSTANCE); return result; } /** * Returns the capabilities of this multi-instance classifier for the * relational data. * * @return the capabilities of this object * @see Capabilities */ public Capabilities getMultiInstanceCapabilities() { Capabilities result = super.getCapabilities(); // attributes result.enable(Capability.NUMERIC_ATTRIBUTES); result.enable(Capability.MISSING_VALUES); // class result.disableAllClasses(); result.enable(Capability.NO_CLASS); return result; } /** * * @param exs the training exemplars * @throws Exception if the model cannot be built properly */ public void buildClassifier(Instances exs)throws Exception{ // can classifier handle the data? getCapabilities().testWithFail(exs); // remove instances with missing class exs = new Instances(exs); exs.deleteWithMissingClass(); int numegs = exs.numInstances(); m_Dimension = exs.attribute(1).relation(). numAttributes(); Instances pos = new Instances(exs, 0), neg = new Instances(exs, 0); for(int u=0; u<numegs; u++){ Instance example = exs.instance(u); if(example.classValue() == 1) pos.add(example); else neg.add(example); } int pnum = pos.numInstances(), nnum = neg.numInstances(); m_MeanP = new double[pnum][m_Dimension]; m_VarianceP = new double[pnum][m_Dimension]; m_SumP = new double[pnum][m_Dimension]; m_MeanN = new double[nnum][m_Dimension]; m_VarianceN = new double[nnum][m_Dimension]; m_SumN = new double[nnum][m_Dimension]; m_ParamsP = new double[4*m_Dimension]; m_ParamsN = new double[4*m_Dimension]; // Estimation of the parameters: as the start value for search double[] pSumVal=new double[m_Dimension], // for m nSumVal=new double[m_Dimension]; double[] maxVarsP=new double[m_Dimension], // for a maxVarsN=new double[m_Dimension]; // Mean of sample variances: for b, b=a/E(\sigma^2)+2 double[] varMeanP = new double[m_Dimension], varMeanN = new double[m_Dimension]; // Variances of sample means: for w, w=E[var(\mu)]/E[\sigma^2] double[] meanVarP = new double[m_Dimension], meanVarN = new double[m_Dimension]; // number of exemplars without all values missing double[] numExsP = new double[m_Dimension], numExsN = new double[m_Dimension]; // Extract metadata fro both positive and negative bags for(int v=0; v < pnum; v++){ /*Exemplar px = pos.exemplar(v); m_MeanP[v] = px.meanOrMode(); m_VarianceP[v] = px.variance(); Instances pxi = px.getInstances(); */ Instances pxi = pos.instance(v).relationalValue(1); for (int k=0; k<pxi.numAttributes(); k++) { m_MeanP[v][k] = pxi.meanOrMode(k); m_VarianceP[v][k] = pxi.variance(k); } for (int w=0,t=0; w < m_Dimension; w++,t++){ //if((t==m_ClassIndex) || (t==m_IdIndex)) // t++; if(!Double.isNaN(m_MeanP[v][w])){ for(int u=0;u<pxi.numInstances();u++){ Instance ins = pxi.instance(u); if(!ins.isMissing(t)) m_SumP[v][w] += ins.weight(); } numExsP[w]++; pSumVal[w] += m_MeanP[v][w]; meanVarP[w] += m_MeanP[v][w]*m_MeanP[v][w]; if(maxVarsP[w] < m_VarianceP[v][w]) maxVarsP[w] = m_VarianceP[v][w]; varMeanP[w] += m_VarianceP[v][w]; m_VarianceP[v][w] *= (m_SumP[v][w]-1.0); if(m_VarianceP[v][w] < 0.0) m_VarianceP[v][w] = 0.0; } } } for(int v=0; v < nnum; v++){ /*Exemplar nx = neg.exemplar(v); m_MeanN[v] = nx.meanOrMode(); m_VarianceN[v] = nx.variance(); Instances nxi = nx.getInstances(); */ Instances nxi = neg.instance(v).relationalValue(1); for (int k=0; k<nxi.numAttributes(); k++) { m_MeanN[v][k] = nxi.meanOrMode(k); m_VarianceN[v][k] = nxi.variance(k); } for (int w=0,t=0; w < m_Dimension; w++,t++){ //if((t==m_ClassIndex) || (t==m_IdIndex)) // t++; if(!Double.isNaN(m_MeanN[v][w])){ for(int u=0;u<nxi.numInstances();u++) if(!nxi.instance(u).isMissing(t)) m_SumN[v][w] += nxi.instance(u).weight(); numExsN[w]++; nSumVal[w] += m_MeanN[v][w]; meanVarN[w] += m_MeanN[v][w]*m_MeanN[v][w]; if(maxVarsN[w] < m_VarianceN[v][w]) maxVarsN[w] = m_VarianceN[v][w]; varMeanN[w] += m_VarianceN[v][w]; m_VarianceN[v][w] *= (m_SumN[v][w]-1.0); if(m_VarianceN[v][w] < 0.0) m_VarianceN[v][w] = 0.0; } } } for(int w=0; w<m_Dimension; w++){ pSumVal[w] /= numExsP[w]; nSumVal[w] /= numExsN[w]; if(numExsP[w]>1) meanVarP[w] = meanVarP[w]/(numExsP[w]-1.0) - pSumVal[w]*numExsP[w]/(numExsP[w]-1.0); if(numExsN[w]>1) meanVarN[w] = meanVarN[w]/(numExsN[w]-1.0) - nSumVal[w]*numExsN[w]/(numExsN[w]-1.0); varMeanP[w] /= numExsP[w]; varMeanN[w] /= numExsN[w]; } //Bounds and parameter values for each run double[][] bounds = new double[2][4]; double[] pThisParam = new double[4], nThisParam = new double[4]; // Initial values for parameters double a, b, w, m; // Optimize for one dimension for (int x=0; x < m_Dimension; x++){ if (getDebug()) System.err.println("\n\n!!!!!!!!!!!!!!!!!!!!!!???Dimension #"+x); // Positive examplars: first run a = (maxVarsP[x]>ZERO) ? maxVarsP[x]:1.0; if (varMeanP[x]<=ZERO) varMeanP[x] = ZERO; // modified by LinDong (09/2005) b = a/varMeanP[x]+2.0; // a/(b-2) = E(\sigma^2) w = meanVarP[x]/varMeanP[x]; // E[var(\mu)] = w*E[\sigma^2] if(w<=ZERO) w=1.0; m = pSumVal[x]; pThisParam[0] = a; // a
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -