📄 smoreg.java

📁 Java 编写的多种数据挖掘算法包括聚类、分类、预处理等
💻 JAVA
📖 第 1 页 / 共 4 页
字号:
12 3 4 下一页
/* *    This program is free software; you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation; either version 2 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program; if not, write to the Free Software *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* *    SMOreg.java *    Copyright (C) 2002 Sylvain Roy * */package weka.classifiers.functions;import weka.classifiers.Classifier;import weka.classifiers.Evaluation;import weka.classifiers.functions.supportVector.Kernel;import weka.classifiers.functions.supportVector.PolyKernel;import weka.classifiers.functions.supportVector.SMOset;import weka.core.Capabilities;import weka.core.Instance;import weka.core.Instances;import weka.core.Option;import weka.core.OptionHandler;import weka.core.SelectedTag;import weka.core.Tag;import weka.core.TechnicalInformation;import weka.core.TechnicalInformationHandler;import weka.core.Utils;import weka.core.WeightedInstancesHandler;import weka.core.Capabilities.Capability;import weka.core.TechnicalInformation.Field;import weka.core.TechnicalInformation.Type;import weka.filters.Filter;import weka.filters.unsupervised.attribute.NominalToBinary;import weka.filters.unsupervised.attribute.Normalize;import weka.filters.unsupervised.attribute.ReplaceMissingValues;import weka.filters.unsupervised.attribute.Standardize;import java.util.Enumeration;import java.util.Vector;/** <!-- globalinfo-start --> * Implements Alex Smola and Bernhard Scholkopf's sequential minimal optimization algorithm for training a support vector regression model. This implementation globally replaces all missing values and transforms nominal attributes into binary ones. It also normalizes all attributes by default. (Note that the coefficients in the output are based on the normalized/standardized data, not the original data.)<br/> * <br/> * For more information on the SMO algorithm, see<br/> * <br/> * Alex J. Smola, Bernhard Schoelkopf: A Tutorial on Support Vector Regression. In NeuroCOLT2 Technical Report Series, 1998.<br/> * <br/> * S.K. Shevade, S.S. Keerthi, C. Bhattacharyya, K.R.K. Murthy (1999). Improvements to SMO Algorithm for SVM Regression. Control Division Dept of Mechanical and Production Engineering, National University of Singapore. * <p/> <!-- globalinfo-end --> * <!-- technical-bibtex-start --> * BibTeX: * <pre> * &#64;incollection{Smola1998, *    author = {Alex J. Smola and Bernhard Schoelkopf}, *    booktitle = {NeuroCOLT2 Technical Report Series}, *    note = {NC2-TR-1998-030}, *    title = {A Tutorial on Support Vector Regression}, *    year = {1998} * } *  * &#64;techreport{Shevade1999, *    address = {Control Division Dept of Mechanical and Production Engineering, National University of Singapore}, *    author = {S.K. Shevade and S.S. Keerthi and C. Bhattacharyya and K.R.K. Murthy}, *    institution = {National University of Singapore}, *    note = {Technical Report CD-99-16}, *    title = {Improvements to SMO Algorithm for SVM Regression}, *    year = {1999} * } * </pre> * <p/> <!-- technical-bibtex-end --> * <!-- options-start --> * Valid options are: <p/> *  * <pre> -D *  If set, classifier is run in debug mode and *  may output additional info to the console</pre> *  * <pre> -no-checks *  Turns off all checks - use with caution! *  Turning them off assumes that data is purely numeric, doesn't *  contain any missing values, and has a nominal class. Turning them *  off also means that no header information will be stored if the *  machine is linear. Finally, it also assumes that no instance has *  a weight equal to 0. *  (default: checks on)</pre> *  * <pre> -S &lt;double&gt; *  The amount up to which deviations are *  tolerated (epsilon). (default 1e-3)</pre> *  * <pre> -C &lt;double&gt; *  The complexity constant C. (default 1)</pre> *  * <pre> -N *  Whether to 0=normalize/1=standardize/2=neither. (default 0=normalize)</pre> *  * <pre> -T &lt;double&gt; *  The tolerance parameter. (default 1.0e-3)</pre> *  * <pre> -P &lt;double&gt; *  The epsilon for round-off error. (default 1.0e-12)</pre> *  * <pre> -K &lt;classname and parameters&gt; *  The Kernel to use. *  (default: weka.classifiers.functions.supportVector.PolyKernel)</pre> *  * <pre>  * Options specific to kernel weka.classifiers.functions.supportVector.PolyKernel: * </pre> *  * <pre> -D *  Enables debugging output (if available) to be printed. *  (default: off)</pre> *  * <pre> -no-checks *  Turns off all checks - use with caution! *  (default: checks on)</pre> *  * <pre> -C &lt;num&gt; *  The size of the cache (a prime number). *  (default: 250007)</pre> *  * <pre> -E &lt;num&gt; *  The Exponent to use. *  (default: 1.0)</pre> *  * <pre> -L *  Use lower-order terms. *  (default: no)</pre> *  <!-- options-end --> * * @author Sylvain Roy (sro33@student.canterbury.ac.nz) * @version $Revision: 1.11 $ */public class SMOreg   extends Classifier   implements OptionHandler, WeightedInstancesHandler, TechnicalInformationHandler {  /** for serialization */  static final long serialVersionUID = 5783729368717679645L;      /** Kernel to use **/  protected Kernel m_kernel = new PolyKernel();  /** The class index from the training data */  protected int m_classIndex = -1;  /** The filter used to make attributes numeric. */  protected NominalToBinary m_NominalToBinary;  /** normalize data */  public static final int FILTER_NORMALIZE = 0;  /** standardize data */  public static final int FILTER_STANDARDIZE = 1;  /** no filtering */  public static final int FILTER_NONE = 2;  /** The filter to apply to the training data */  public static final Tag [] TAGS_FILTER = {    new Tag(FILTER_NORMALIZE, "Normalize training data"),    new Tag(FILTER_STANDARDIZE, "Standardize training data"),    new Tag(FILTER_NONE, "No normalization/standardization"),  };      /** The filter used to standardize/normalize all values. */  protected Filter m_Filter = null;      /** Whether to normalize/standardize/neither */  protected int m_filterType = FILTER_NORMALIZE;  /** The filter used to get rid of missing values. */  protected ReplaceMissingValues m_Missing;      /** Turn off all checks and conversions? Turning them off assumes      that data is purely numeric, doesn't contain any missing values,      and has a numeric class. Turning them off also means that      no header information will be stored if the machine is linear.       Finally, it also assumes that no instance has a weight equal to 0.*/  protected boolean m_checksTurnedOff = false;      /** The training data. */  protected Instances m_data;      /** The complexity parameter */  protected double m_C = 1.0;  /** The Lagrange multipliers */  protected double[] m_alpha;  protected double[] m_alpha_;  /** The thresholds. */  protected double m_b, m_bLow, m_bUp;  /** The indices for m_bLow and m_bUp */  protected int m_iLow, m_iUp;  /** Weight vector for linear machine. */  protected double[] m_weights;  /** The current set of errors for all non-bound examples. */  protected double[] m_fcache;  /* The four different sets used by the algorithm. */  /** {i: 0 < m_alpha[i] < C || 0 < m_alpha_[i] < C} */  protected SMOset m_I0;   /** {i: m_class[i] = 0, m_alpha_[i] = 0} */  protected SMOset m_I1;   /** {i: m_class[i] = 0, m_alpha_[i] = C} */  protected SMOset m_I2;   /** {i: m_class[i] = C, m_alpha_[i] = 0} */  protected SMOset m_I3;   /** The parameter epsilon */  protected double m_epsilon = 1e-3;  /** The parameter tol */  protected double m_tol = 1.0e-3;  /** The parameter eps */  protected double m_eps = 1.0e-12;  /** Precision constant for updating sets */  protected static double m_Del = 1e-10;      /** The parameters of the linear transforamtion realized    * by the filter on the class attribute */  protected double m_Alin;  protected double m_Blin;  /** Variables to hold weight vector in sparse form.      (To reduce storage requirements.) */  protected double[] m_sparseWeights;  protected int[] m_sparseIndices;    /** whether the kernel is a linear one */  protected boolean m_KernelIsLinear = false;  /**   * Returns a string describing classifier   * @return a description suitable for   * displaying in the explorer/experimenter gui   */  public String globalInfo() {    return  "Implements Alex Smola and Bernhard Scholkopf's sequential minimal "      + "optimization algorithm for training a support vector regression model. "      + "This implementation globally replaces all missing values and "      + "transforms nominal attributes into binary ones. It also "      + "normalizes all attributes by default. (Note that the coefficients "      + "in the output are based on the normalized/standardized data, not the "      + "original data.)\n\n"      + "For more information on the SMO algorithm, see\n\n"      + getTechnicalInformation().toString();  }  /**   * Returns an instance of a TechnicalInformation object, containing    * detailed information about the technical background of this class,   * e.g., paper reference or book this class is based on.   *    * @return the technical information about this class   */  public TechnicalInformation getTechnicalInformation() {    TechnicalInformation 	result;    TechnicalInformation 	additional;        result = new TechnicalInformation(Type.INCOLLECTION);    result.setValue(Field.AUTHOR, "Alex J. Smola and Bernhard Schoelkopf");    result.setValue(Field.YEAR, "1998");    result.setValue(Field.TITLE, "A Tutorial on Support Vector Regression");    result.setValue(Field.BOOKTITLE, "NeuroCOLT2 Technical Report Series");    result.setValue(Field.NOTE, "NC2-TR-1998-030");        additional = result.add(Type.TECHREPORT);    additional.setValue(Field.AUTHOR, "S.K. Shevade and S.S. Keerthi and C. Bhattacharyya and K.R.K. Murthy");    additional.setValue(Field.YEAR, "1999");    additional.setValue(Field.TITLE, "Improvements to SMO Algorithm for SVM Regression");    additional.setValue(Field.INSTITUTION, "National University of Singapore");    additional.setValue(Field.ADDRESS, "Control Division Dept of Mechanical and Production Engineering, National University of Singapore");    additional.setValue(Field.NOTE, "Technical Report CD-99-16");        return result;  }  /**   * Returns default capabilities of the classifier.   *   * @return      the capabilities of this classifier   */  public Capabilities getCapabilities() {    Capabilities result = getKernel().getCapabilities();    result.setOwner(this);    // attribute    result.enableAllAttributeDependencies();    // with NominalToBinary we can also handle nominal attributes, but only    // if the kernel can handle numeric attributes    if (result.handles(Capability.NUMERIC_ATTRIBUTES))      result.enable(Capability.NOMINAL_ATTRIBUTES);    result.enable(Capability.MISSING_VALUES);        // class    result.disableAllClasses();    result.disableAllClassDependencies();    result.enable(Capability.NUMERIC_CLASS);    result.enable(Capability.DATE_CLASS);    result.enable(Capability.MISSING_CLASS_VALUES);        return result;  }  /**   * Method for building the classifier.    *   * @param insts the set of training instances   * @throws Exception if the classifier can't be built successfully   */  public void buildClassifier(Instances insts) throws Exception {    /* check the set of training instances */    if (!m_checksTurnedOff) {      // can classifier handle the data?      getCapabilities().testWithFail(insts);      // remove instances with missing class      insts = new Instances(insts);      insts.deleteWithMissingClass();	      /* Removes all the instances with weight equal to 0.       MUST be done since condition (6) of Shevade's paper        is made with the assertion Ci > 0 (See equation (1a). */      Instances data = new Instances(insts, insts.numInstances());      for(int i = 0; i < insts.numInstances(); i++){        if(insts.instance(i).weight() > 0)          data.add(insts.instance(i));      }      if (data.numInstances() == 0) {        throw new Exception("No training instances left after removing " +         "instances with weight 0!");      }      insts = data;    }    if (!m_checksTurnedOff) {      m_Missing = new ReplaceMissingValues();      m_Missing.setInputFormat(insts);      insts = Filter.useFilter(insts, m_Missing);     } else {      m_Missing = null;    }    if (getCapabilities().handles(Capability.NUMERIC_ATTRIBUTES)) {      boolean onlyNumeric = true;      if (!m_checksTurnedOff) {	for (int i = 0; i < insts.numAttributes(); i++) {	  if (i != insts.classIndex()) {	    if (!insts.attribute(i).isNumeric()) {	      onlyNumeric = false;	      break;	    }	  }	}      }            if (!onlyNumeric) {	m_NominalToBinary = new NominalToBinary();	m_NominalToBinary.setInputFormat(insts);	insts = Filter.useFilter(insts, m_NominalToBinary);      }       else {	m_NominalToBinary = null;      }    }    else {      m_NominalToBinary = null;    }    m_classIndex = insts.classIndex();    if (m_filterType == FILTER_STANDARDIZE) {      m_Filter = new Standardize();      ((Standardize)m_Filter).setIgnoreClass(true);      m_Filter.setInputFormat(insts);      insts = Filter.useFilter(insts, m_Filter);     } else if (m_filterType == FILTER_NORMALIZE) {      m_Filter = new Normalize();      ((Normalize)m_Filter).setIgnoreClass(true);      m_Filter.setInputFormat(insts);      insts = Filter.useFilter(insts, m_Filter);     } else {      m_Filter = null;    }    m_data = insts;    // determine which linear transformation has been     // applied to the class by the filter    if (m_Filter != null) {      Instance witness = (Instance)insts.instance(0).copy();      witness.setValue(m_classIndex, 0);      m_Filter.input(witness);      m_Filter.batchFinished();      Instance res = m_Filter.output();      m_Blin = res.value(m_classIndex);      witness.setValue(m_classIndex, 1);      m_Filter.input(witness);      m_Filter.batchFinished();      res = m_Filter.output();      m_Alin = res.value(m_classIndex) - m_Blin;    } else {      m_Alin = 1.0;      m_Blin = 0.0;    }    // Initialize kernel    m_kernel.buildKernel(m_data);    m_KernelIsLinear = (m_kernel instanceof PolyKernel) && (((PolyKernel) m_kernel).getExponent() == 1.0);	    // If machine is linear, reserve space for weights    if (m_KernelIsLinear) {      m_weights = new double[m_data.numAttributes()];    } else {      m_weights = null;    }    // Initialize fcache    m_fcache = new double[m_data.numInstances()];    // Initialize sets    m_I0 = new SMOset(m_data.numInstances());    m_I1 = new SMOset(m_data.numInstances());    m_I2 = new SMOset(m_data.numInstances());    m_I3 = new SMOset(m_data.numInstances());    /* MAIN ROUTINE FOR MODIFICATION 1 */    // Follows the specification of the first modification of Shevade's paper 		    // Initialize alpha array to zero    m_alpha = new double[m_data.numInstances()];    m_alpha_ = new double[m_data.numInstances()];	    // set I1 to contain all the examples    for(int i = 0; i < m_data.numInstances(); i++){      m_I1.insert(i);    }	    // choose any example i from the training set : i = 0     m_bUp = m_data.instance(0).classValue() + m_epsilon;    m_bLow = m_data.instance(0).classValue() - m_epsilon;    m_iUp = m_iLow = 0;	    int numChanged = 0;    boolean examineAll = true;    while(numChanged > 0 || examineAll){      numChanged = 0;      if(examineAll){	// loop over all the example	for(int I = 0; I < m_alpha.length; I++){	  numChanged += examineExample(I);	}      } else {	// loop over I_0	for (int I = m_I0.getNext(-1); I != -1; I = m_I0.getNext(I)) {	  numChanged += examineExample(I);	  if(m_bUp > m_bLow - 2 * m_tol){	    numChanged = 0;	    break;	  }	}
12 3 4 下一页
💿 文件大小 3872 K
👤 上传用户 fengkuangyidao
📂 所属分类 Applet
🏷️ 相关标签

#Java #编写 #数据挖掘算法 #分类
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -